intimate/tasks/twitch/twitch_task2/task_twitch.go

178 lines
4.1 KiB
Go
Raw Normal View History

2020-07-24 10:48:33 +00:00
package main
import (
"database/sql"
"encoding/json"
"intimate"
"log"
"regexp"
"time"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
// estore 解析存储连接实例
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// UserList 频道链接
type UserList struct {
}
// Execute 执行任务
func (cl *UserList) Execute() {
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
//article//a[@data-a-target='preview-card-title-link']
2020-07-26 16:35:41 +00:00
2020-07-24 10:48:33 +00:00
wd := intimate.GetChromeDriver(3030)
2020-08-04 06:12:00 +00:00
defer wd.Quit()
2020-07-31 10:04:10 +00:00
ps := intimate.NewPerfectShutdown()
counter := intimate.NewCounter()
counter.SetMaxLimit(100)
counter.SetMaxToDo(func(olist ...interface{}) error {
owd := olist[0].(*selenium.WebDriver)
(*owd).Quit()
*owd = intimate.GetChromeDriver(3030)
return nil
}, &wd)
for !ps.IsClose() {
2020-07-24 10:48:33 +00:00
2020-07-26 16:35:41 +00:00
var err error
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
2020-07-24 10:48:33 +00:00
if err != nil {
2020-07-26 16:35:41 +00:00
panic(err)
2020-07-24 10:48:33 +00:00
}
2020-07-26 16:35:41 +00:00
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
err = wd.Get(weburl)
2020-07-24 10:48:33 +00:00
if err != nil {
2020-07-31 10:04:10 +00:00
log.Println(err)
sstore.UpdateError(sourceChannel, err)
time.Sleep(time.Second * 10)
continue
2020-07-24 10:48:33 +00:00
}
2020-07-26 16:35:41 +00:00
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
return false, err
}
return true, nil
}, time.Second*10)
2020-07-24 10:48:33 +00:00
2020-07-26 16:35:41 +00:00
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
2020-07-24 10:48:33 +00:00
if err != nil {
log.Println(err)
continue
}
2020-07-26 16:35:41 +00:00
btn.Click()
var elements []selenium.WebElement
var liveurls = 0
var delayerror = 2
2020-07-31 10:04:10 +00:00
for i := 0; i < 200 && !ps.IsClose(); i++ {
2020-07-26 16:35:41 +00:00
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
if err != nil {
log.Println(err)
break
}
2020-07-31 10:04:10 +00:00
time.Sleep(time.Millisecond * 200)
2020-07-26 16:35:41 +00:00
wd.KeyDown(selenium.EndKey)
2020-07-31 10:04:10 +00:00
time.Sleep(time.Millisecond * 200)
2020-07-26 16:35:41 +00:00
wd.KeyUp(selenium.EndKey)
2020-07-31 10:04:10 +00:00
time.Sleep(time.Millisecond * 2000)
2020-07-26 16:35:41 +00:00
if len(elements) == liveurls {
2020-07-28 10:56:27 +00:00
delayerror--
2020-07-26 16:35:41 +00:00
if delayerror <= 0 {
break
}
} else {
delayerror = 2
2020-07-26 16:35:41 +00:00
}
liveurls = len(elements)
2020-07-24 10:48:33 +00:00
}
2020-07-31 10:04:10 +00:00
articles, err := wd.FindElements(selenium.ByXPATH, "//article")
2020-07-24 10:48:33 +00:00
if err != nil {
log.Println(err)
continue
}
2020-07-31 10:04:10 +00:00
for _, article := range articles {
e, err := article.FindElement(selenium.ByXPATH, ".//a[@data-a-target='preview-card-title-link' and @href]")
if err != nil {
log.Println(err)
continue
}
href, err := e.GetAttribute("href")
if err != nil {
log.Println(err)
continue
}
2020-07-26 16:35:41 +00:00
2020-07-31 10:04:10 +00:00
btns, err := article.FindElements(selenium.ByXPATH, ".//div[@class='tw-full-width tw-inline-block']//button")
2020-07-26 16:35:41 +00:00
if err != nil {
log.Println(err)
continue
}
2020-07-31 10:04:10 +00:00
var tags []string
for _, btn := range btns {
tag, err := btn.GetAttribute("data-a-target")
if err == nil {
tags = append(tags, tag)
}
}
2020-07-26 16:35:41 +00:00
streamer := &intimate.Streamer{}
2020-07-31 10:04:10 +00:00
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(href)
2020-07-26 16:35:41 +00:00
if len(matches) == 2 {
streamer.UserId = matches[1]
} else {
2020-07-31 10:04:10 +00:00
log.Println(href)
2020-07-26 16:35:41 +00:00
continue
}
2020-07-31 10:04:10 +00:00
jtags, err := json.Marshal(tags)
if err != nil {
log.Println(err)
} else {
streamer.Tags = jtags
}
2020-07-26 16:35:41 +00:00
streamer.Platform = intimate.Ptwitch
updateUrl := make(map[string]string)
2020-07-31 10:04:10 +00:00
updateUrl["live"] = href
streamer.LiveUrl = sql.NullString{String: href, Valid: true}
2020-07-26 16:35:41 +00:00
data, err := json.Marshal(updateUrl)
if err != nil {
log.Println(err)
continue
}
streamer.UpdateUrl = data
streamer.Operator = 0
2020-07-31 10:04:10 +00:00
if estore.InsertStreamer(streamer) {
// log.Println("streamer update tags", streamer.Uid, tags)
estore.Update(streamer, "Tags", streamer.Tags)
}
2020-07-26 16:35:41 +00:00
}
2020-07-31 10:04:10 +00:00
log.Println("streamer find", len(articles))
if len(articles) == 0 {
sourceChannel.Operator = 5
sstore.UpdateOperator(sourceChannel)
2020-07-28 10:56:27 +00:00
}
2020-07-31 10:04:10 +00:00
counter.AddWithReset(1)
2020-07-24 10:48:33 +00:00
}
2020-07-31 10:04:10 +00:00
wd.Close()
wd.Quit()
2020-07-24 10:48:33 +00:00
}