package main import ( "database/sql" "encoding/json" "intimate" "log" "regexp" "time" "github.com/tebeka/selenium" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() func main() { wd := intimate.GetChromeDriver(3030) ps := intimate.NewPerfectShutdown() counter := intimate.NewCounter() counter.SetMaxLimit(200) counter.SetMaxToDo(func(olist ...interface{}) error { owd := olist[0].(*selenium.WebDriver) (*owd).Close() (*owd).Quit() *owd = intimate.GetChromeDriver(3030) return nil }, &wd) var lasterr error = nil // var err error for !ps.IsClose() { streamer, err := estore.Pop(intimate.Ptwitch, 0) if streamer == nil || err != nil { if err != lasterr { log.Println(err, lasterr) lasterr = err } time.Sleep(time.Second * 2) continue } var updateUrl map[string]string json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) liveUrl := updateUrl["live"] log.Println(liveUrl) // err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about") err = wd.Get(liveUrl + "/about") if err != nil { log.Println(err) estore.UpdateError(streamer, err) time.Sleep(time.Second * 5) continue } streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true} clog := &intimate.CollectLog{} clog.UserId = streamer.UserId clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false} time.Sleep(time.Millisecond * 500) err = extractUserName(wd, streamer) if err != nil { continue } err = extractFollowers(wd, clog) if err != nil { continue } err = extractViews(wd, clog) // views + tags + gratuity if err != nil { // 不直播时提取礼物 gratuity wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`) btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`) if (err == nil && channelchat != nil) || btn != nil { if channelchat != nil { channelchat.Click() } time.Sleep(time.Second) extractGratuity(wd, clog) return true, nil } return false, nil }, time.Second*4) } streamer.Platform = intimate.Ptwitch clog.Platform = string(streamer.Platform) clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} lastClogId := estore.InsertClog(clog) streamer.Operator = 10 streamer.LatestLogUid = lastClogId if clog.Tags != nil { streamer.Tags = clog.Tags } switch fl := clog.Followers.Int64; { case fl > 100000: streamer.UpdateInterval = 120 case fl > 10000: streamer.UpdateInterval = 240 case fl > 1000: streamer.UpdateInterval = 360 case fl > 100: streamer.UpdateInterval = 720 case fl > 0: streamer.UpdateInterval = 1440 } streamer.UpdateTime = clog.UpdateTime estore.UpdateStreamer(streamer) counter.AddWithReset(1) } wd.Close() wd.Quit() } func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { label, err := web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1") if err == nil { if ltxt, err := label.Text(); err == nil && ltxt != "" { // log.Println("label:", ltxt) streamer.UserName = sql.NullString{String: ltxt, Valid: true} return true, nil } } return false, err }, 15*time.Second) } func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { efollowers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']") if err != nil { return false, err } followers, err := efollowers.Text() if err != nil || followers == "" { return false, err } followers = regexp.MustCompile(`[\d,]+`).FindString(followers) fint, _ := intimate.ParseNumber(followers) clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true} // log.Println("followers: ", followers, fint) return true, nil }, 4*time.Second) } func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { views, err := web.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span") if views != nil { if txt, err := views.Text(); err == nil { vint, _ := intimate.ParseNumber(txt) clog.Views = sql.NullInt64{Int64: vint, Valid: true} // log.Println("views:", txt) views.Click() extractTags(wd, clog) extractTitle(wd, clog) extractGratuity(wd, clog) return true, nil } } return false, err }, time.Second*4) } func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`) if err == nil { if txt, err := title.Text(); err == nil { clog.LiveTitle = sql.NullString{String: txt, Valid: true} return true, nil } } return false, err }, time.Second*4) } func extractTags(wd selenium.WebDriver, clog *intimate.CollectLog) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { tags, err := web.FindElements(selenium.ByXPATH, "//a[@aria-label and @data-a-target and @href]/div[@class and text()]") if len(tags) == 0 { return false, err } var stags []string for _, tag := range tags { if txt, err := tag.Text(); err == nil { stags = append(stags, txt) } else { log.Println(err) } } if len(stags) > 0 { if tagbuf, err := json.Marshal(stags); err == nil { clog.Tags = tagbuf } else { log.Println(err) } } return true, nil }, time.Second*4) } func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error { return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) { btn, err := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`) if err == nil { btn.Click() time.Sleep(time.Second) gifcount, err := web.FindElements(selenium.ByXPATH, `//div[@class="sub-gift-count tw-flex"]/p`) if err == nil { var gratuity int64 = 0 for _, gc := range gifcount { if gtxt, err := gc.Text(); err == nil { gint, _ := intimate.ParseNumber(gtxt) gratuity += gint } else { log.Println(err) } } clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true} } return true, nil } return false, err }, time.Second*4) }