package main import ( "database/sql" "encoding/json" "intimate" "log" "os" "os/signal" "regexp" "sync/atomic" "syscall" "time" "github.com/tebeka/selenium" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() // 获取类型的所有频道链接 // UserList 频道链接 type UserList struct { } // Execute 执行任务 func (cl *UserList) Execute() { // DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ; //article//a[@data-a-target='preview-card-title-link'] wd := intimate.GetChromeDriver(3030) defer wd.Close() var loop int32 = 1 var count = 0 go func() { signalchan := make(chan os.Signal) signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP) log.Println("accept stop command:", <-signalchan) atomic.StoreInt32(&loop, 0) }() for atomic.LoadInt32(&loop) > 0 { var err error sourceChannel, err := sstore.Pop(intimate.TTwitchChannel) if err != nil { panic(err) } weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT" err = wd.Get(weburl) if err != nil { panic(err) } wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) { _, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]") if err != nil { return false, err } return true, nil }, time.Second*10) btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']") if err != nil { log.Println(err) continue } btn.Click() var elements []selenium.WebElement var liveurls = 0 var delayerror = 2 for i := 0; i < 200 && atomic.LoadInt32(&loop) > 0; i++ { elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]") if err != nil { log.Println(err) break } time.Sleep(time.Millisecond * 500) wd.KeyDown(selenium.EndKey) wd.KeyUp(selenium.EndKey) time.Sleep(time.Millisecond * 1500) if len(elements) == liveurls { delayerror-- if delayerror <= 0 { break } } else { delayerror = 2 } liveurls = len(elements) } elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]") if err != nil { log.Println(err) continue } for _, e := range elements { attr, err := e.GetAttribute("href") if err != nil { log.Println(err) continue } streamer := &intimate.Streamer{} matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr) if len(matches) == 2 { streamer.UserId = matches[1] } else { log.Println(attr) continue } streamer.Platform = intimate.Ptwitch updateUrl := make(map[string]string) updateUrl["live"] = attr streamer.LiveUrl = sql.NullString{String: attr, Valid: true} data, err := json.Marshal(updateUrl) if err != nil { log.Println(err) continue } streamer.UpdateUrl = data streamer.Operator = 0 estore.InsertStreamer(streamer) } log.Println("streamer insert", len(elements)) count++ if count >= 100 { wd.Close() wd = intimate.GetChromeDriver(3030) count = 0 } } }