package main import ( "database/sql" "encoding/json" "intimate" "log" "regexp" "time" "github.com/tebeka/selenium" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() // 获取类型的所有频道链接 // UserList 频道链接 type UserList struct { } // Execute 执行任务 func (cl *UserList) Execute() { // DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ; //article//a[@data-a-target='preview-card-title-link'] wd := intimate.GetChromeDriver(3030) defer wd.Quit() ps := intimate.NewPerfectShutdown() counter := intimate.NewCounter() counter.SetMaxLimit(100) counter.SetMaxToDo(func(olist ...interface{}) error { owd := olist[0].(*selenium.WebDriver) (*owd).Quit() *owd = intimate.GetChromeDriver(3030) return nil }, &wd) for !ps.IsClose() { var err error sourceChannel, err := sstore.Pop(intimate.TTwitchChannel) if err != nil { panic(err) } weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT" err = wd.Get(weburl) if err != nil { log.Println(err) sstore.UpdateError(sourceChannel, err) time.Sleep(time.Second * 10) continue } wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) { _, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]") if err != nil { return false, err } return true, nil }, time.Second*10) btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") if err != nil { log.Println(err) continue } btn.Click() var elements []selenium.WebElement var liveurls = 0 var delayerror = 2 for i := 0; i < 200 && !ps.IsClose(); i++ { elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]") if err != nil { log.Println(err) break } time.Sleep(time.Millisecond * 200) wd.KeyDown(selenium.EndKey) time.Sleep(time.Millisecond * 200) wd.KeyUp(selenium.EndKey) time.Sleep(time.Millisecond * 2000) if len(elements) == liveurls { delayerror-- if delayerror <= 0 { break } } else { delayerror = 2 } liveurls = len(elements) } articles, err := wd.FindElements(selenium.ByXPATH, "//article") if err != nil { log.Println(err) continue } for _, article := range articles { e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]") if err != nil { log.Println(err) continue } href, err := e.GetAttribute("href") if err != nil { log.Println(err) continue } btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button") if err != nil { log.Println(err) continue } var tags []string for _, btn := range btns { tag, err := btn.GetAttribute("data-a-target") if err == nil { tags = append(tags, tag) } } streamer := &intimate.Streamer{} matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href) if len(matches) == 2 { streamer.UserId = matches[1] } else { log.Println(href) continue } jtags, err := json.Marshal(tags) if err != nil { log.Println(err) } else { streamer.Tags = jtags } streamer.Platform = intimate.Ptwitch updateUrl := make(map[string]string) updateUrl["live"] = href streamer.LiveUrl = sql.NullString{String: href, Valid: true} data, err := json.Marshal(updateUrl) if err != nil { log.Println(err) continue } streamer.UpdateUrl = data streamer.Operator = 0 if estore.InsertStreamer(streamer) { // log.Println("streamer update tags", streamer.Uid, tags) estore.Update(streamer, "Tags", streamer.Tags) } } log.Println("streamer find", len(articles)) if len(articles) == 0 { sourceChannel.Operator = 5 sstore.UpdateOperator(sourceChannel) } counter.AddWithReset(1) } wd.Close() wd.Quit() }