intimate/extractor/twitch_extractor/tiwtch_extractor.go
eson 3b7e8e94ea nimo 插件优化
nimo 数据测试提取成功
2020-09-02 18:56:20 +08:00

255 lines
6.9 KiB
Go

package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"strings"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func main() {
wd := intimate.GetChromeDriver(3030)
ps := intimate.NewPerfectShutdown()
var count = 0
var countlimt = 200
var lasterr error = nil
// var err error
for !ps.IsClose() {
streamer, err := estore.Pop(intimate.Ptwitch)
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
time.Sleep(time.Second * 2)
continue
}
var updateUrl map[string]string
json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl)
liveUrl := updateUrl["live"]
liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1)
log.Println(liveUrl)
// err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about")
err = wd.Get(liveUrl + "/about")
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
time.Sleep(time.Second * 5)
continue
}
streamer.LiveUrl = sql.NullString{String: liveUrl, Valid: true}
clog := &intimate.CollectLog{}
clog.UserId = streamer.UserId
clog.Gratuity = sql.NullInt64{Int64: 0, Valid: false}
time.Sleep(time.Millisecond * 500)
err = extractUserName(wd, streamer)
if err != nil {
_, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']")
if err == nil {
log.Println(streamer.UserId, "may be cancell")
streamer.Operator = 5
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
estore.UpdateStreamer(streamer)
}
continue
}
err = extractFollowers(wd, clog)
if err != nil {
continue
}
err = extractViews(wd, clog) // views + tags + gratuity
if err != nil {
// 不直播时提取礼物 gratuity
wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
channelchat, err := wd.FindElement(selenium.ByXPATH, `//a[@data-a-target="channel-home-tab-Chat"]`)
btn, _ := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if (err == nil && channelchat != nil) || btn != nil {
if channelchat != nil {
channelchat.Click()
}
time.Sleep(time.Second)
extractGratuity(wd, clog)
return true, nil
}
return false, nil
}, time.Second*4)
}
streamer.Platform = intimate.Ptwitch
clog.Platform = streamer.Platform
clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
lastClogId := estore.InsertClog(clog)
streamer.Operator = 10
streamer.LatestLogUid = lastClogId
if clog.Tags != nil {
streamer.Tags = clog.Tags
}
switch fl := clog.Followers.Int64; {
case fl > 100000:
streamer.UpdateInterval = 120
case fl > 10000:
streamer.UpdateInterval = 240 * 2
case fl > 1000:
streamer.UpdateInterval = 360 * 2
case fl > 100:
streamer.UpdateInterval = 720 * 2
case fl > 0:
streamer.UpdateInterval = 1440 * 4
}
streamer.UpdateTime = clog.UpdateTime
estore.UpdateStreamer(streamer)
count++
if count >= countlimt {
count = 0
// wd.Quit()
wd = intimate.GetChromeDriver(3030)
}
}
wd.Close()
wd.Quit()
}
func extractUserName(wd selenium.WebDriver, streamer *intimate.Streamer) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
label, err := web.FindElement(selenium.ByXPATH, "//a[@class='tw-interactive']//h1")
if err == nil {
if ltxt, err := label.Text(); err == nil && ltxt != "" {
// log.Println("label:", ltxt)
streamer.UserName = sql.NullString{String: ltxt, Valid: true}
return true, nil
}
}
return false, err
}, 15*time.Second)
}
func extractFollowers(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
efollowers, err := web.FindElement(selenium.ByXPATH, "//div[@data-a-target='about-panel']//div[@class='tw-align-center']")
if err != nil {
return false, err
}
followers, err := efollowers.Text()
if err != nil || followers == "" {
return false, err
}
followers = regexp.MustCompile(`[\d,]+`).FindString(followers)
fint, _ := intimate.ParseNumber(followers)
clog.Followers = sql.NullInt64{Int64: int64(fint), Valid: true}
// log.Println("followers: ", followers, fint)
return true, nil
}, 4*time.Second)
}
func extractViews(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
views, err := web.FindElement(selenium.ByXPATH, "//a[@data-a-target='home-live-overlay-button']/span")
if views != nil {
if txt, err := views.Text(); err == nil {
vint, _ := intimate.ParseNumber(txt)
clog.Views = sql.NullInt64{Int64: vint, Valid: true}
// log.Println("views:", txt)
views.Click()
extractTags(wd, clog)
extractTitle(wd, clog)
extractGratuity(wd, clog)
return true, nil
}
}
return false, err
}, time.Second*4)
}
func extractTitle(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
title, err := web.FindElement(selenium.ByXPATH, `//h2[@data-a-target='stream-title']`)
if err == nil {
if txt, err := title.Text(); err == nil {
clog.LiveTitle = sql.NullString{String: txt, Valid: true}
return true, nil
}
}
return false, err
}, time.Second*4)
}
func extractTags(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
tags, err := web.FindElements(selenium.ByXPATH, "//a[@aria-label and @data-a-target and @href]/div[@class and text()]")
if len(tags) == 0 {
return false, err
}
var stags []string
for _, tag := range tags {
if txt, err := tag.Text(); err == nil {
stags = append(stags, txt)
} else {
log.Println(err)
}
}
if len(stags) > 0 {
if tagbuf, err := json.Marshal(stags); err == nil {
clog.Tags = tagbuf
} else {
log.Println(err)
}
}
return true, nil
}, time.Second*4)
}
func extractGratuity(wd selenium.WebDriver, clog *intimate.CollectLog) error {
return wd.WaitWithTimeout(func(web selenium.WebDriver) (bool, error) {
btn, err := web.FindElement(selenium.ByXPATH, `//button[@data-test-selector="expand-grabber"]`)
if err == nil {
btn.Click()
time.Sleep(time.Second)
gifcount, err := web.FindElements(selenium.ByXPATH, `//div[@class="sub-gift-count tw-flex"]/p`)
if err == nil {
var gratuity int64 = 0
for _, gc := range gifcount {
if gtxt, err := gc.Text(); err == nil {
gint, _ := intimate.ParseNumber(gtxt)
gratuity += gint
} else {
log.Println(err)
}
}
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
}
return true, nil
}
return false, err
}, time.Second*4)
}