package main import ( "database/sql" "intimate" "log" "time" "github.com/tebeka/selenium" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() // 获取类型的所有频道链接 // ChannelLink 频道链接 type ChannelLink struct { } // Execute 执行任务 func (cl *ChannelLink) Execute() { var err error wd := intimate.GetChromeDriver(3030) ps := intimate.NewPerfectShutdown() weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT" err = wd.Get(weburl) if err != nil { panic(err) } cardCondition := func(wd selenium.WebDriver) (bool, error) { elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") if err != nil { return false, err } return len(elements) > 0, nil } wd.WaitWithTimeout(cardCondition, time.Second*15) time.Sleep(time.Second) e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") if err != nil { panic(err) } e.Click() var hrefs map[string]bool = make(map[string]bool) var delayerror = 5 var samecount = 0 for i := 0; i <= 200; i++ { cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]") if err != nil { log.Println(err) break } if len(cards) == samecount { delayerror-- if delayerror <= 0 { break } } else { delayerror = 5 } for ii := 0; ii < 10; ii++ { for _, card := range cards { href, err := card.GetAttribute("href") if err != nil { log.Println(href, err) continue } else { hrefs[href] = true } } break } samecount = len(cards) if ps.IsClose() { break } if len(cards) > 10 { log.Println(len(cards)) wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil) } time.Sleep(time.Millisecond * 200) wd.KeyDown(selenium.EndKey) time.Sleep(time.Millisecond * 200) wd.KeyUp(selenium.EndKey) time.Sleep(time.Millisecond * 2500) } for href := range hrefs { // TODO: Save href source := &intimate.Source{} source.Source = sql.NullString{String: href, Valid: true} source.Operator = 0 source.Target = intimate.TTwitchChannel source.Url = weburl sstore.Insert(source) } log.Println("hrefs len:", len(hrefs)) sstore.Deduplicate(intimate.TTwitchChannel, "source") }