package main import ( "intimate" "log" "time" "github.com/tebeka/selenium" ) // // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql // var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch)) // // estore 解析存储连接实例 // var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() // 获取类型的所有频道链接 // Execute 执行任务 func Execute() { ps := intimate.NewPerfectShutdown() for !ps.IsClose() { var err error adriver := intimate.GetChromeDriver() wd := adriver.Webdriver weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT" err = wd.Get(weburl) if err != nil { panic(err) } cardCondition := func(wd selenium.WebDriver) (bool, error) { elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") if err != nil { return false, err } return len(elements) > 0, nil } wd.WaitWithTimeout(cardCondition, time.Second*15) time.Sleep(time.Second) e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") if err != nil { panic(err) } e.Click() var lasthreflen = 0 var hrefs map[string]bool = make(map[string]bool) var delayerror = 5 for i := 0; i <= 200; i++ { cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") if err != nil { log.Println(err) break } if len(hrefs) == lasthreflen { delayerror-- if delayerror <= 0 { break } } else { delayerror = 7 } lasthreflen = len(hrefs) for ii := 0; ii < 10; ii++ { for _, card := range cards { href, err := card.GetAttribute("href") if err != nil { log.Println(href, err) continue } else { hrefs[href] = true } } break } if ps.IsClose() { break } if len(cards) > 10 { log.Println(len(cards)) wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil) } time.Sleep(time.Millisecond * 200) wd.KeyDown(selenium.EndKey) time.Sleep(time.Millisecond * 200) wd.KeyUp(selenium.EndKey) time.Sleep(time.Millisecond * 2500) } for href := range hrefs { sl := &intimate.StreamerList{} sl.Url = href sl.UrlHash = intimate.GetUrlHash(sl.Url) sl.Platform = string(intimate.Ptwitch) sl.UpdateTime = intimate.GetUpdateTimeNow() err := intimate.TStreamerList.Insert(sl) if err != nil { log.Println(err) } // TODO: Save href // source := &intimate.Source{} // source.Source = sql.NullString{String: href, Valid: true} // source.Operator = 0 // source.Target = intimate.TTwitchChannel // source.Url = weburl // sstore.Insert(source) } log.Println("hrefs len:", len(hrefs)) // sstore.Deduplicate(intimate.TTwitchChannel, "source") // wd.Close() // wd.Quit() time.Sleep(time.Minute * 30) } }