intimate/tasks/twitch/twitch_task1/task_twitch.go

129 lines
3.1 KiB
Go
Raw Normal View History

2020-07-22 12:00:02 +00:00
package main
import (
"intimate"
"log"
"time"
"github.com/tebeka/selenium"
)
2020-09-08 10:24:51 +00:00
// // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
2020-09-08 10:24:51 +00:00
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
// 获取类型的所有频道链接
// Execute 执行任务
func Execute() {
2020-09-08 10:24:51 +00:00
2020-07-31 10:04:10 +00:00
ps := intimate.NewPerfectShutdown()
2020-09-08 10:24:51 +00:00
for !ps.IsClose() {
2020-09-10 09:33:52 +00:00
2020-09-08 10:24:51 +00:00
var err error
2020-09-10 09:33:52 +00:00
adriver := intimate.GetChromeDriver()
wd := adriver.Webdriver
2020-09-08 10:24:51 +00:00
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
err = wd.Get(weburl)
if err != nil {
2020-09-08 10:24:51 +00:00
panic(err)
}
2020-09-08 10:24:51 +00:00
cardCondition := func(wd selenium.WebDriver) (bool, error) {
elements, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
return false, err
}
return len(elements) > 0, nil
}
wd.WaitWithTimeout(cardCondition, time.Second*15)
time.Sleep(time.Second)
2020-09-08 10:24:51 +00:00
e, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
if err != nil {
2020-09-08 10:24:51 +00:00
panic(err)
2020-07-28 10:56:27 +00:00
}
2020-09-08 10:24:51 +00:00
e.Click()
var lasthreflen = 0
var hrefs map[string]bool = make(map[string]bool)
var delayerror = 5
for i := 0; i <= 200; i++ {
cards, err := wd.FindElements(selenium.ByXPATH, "//span/a[contains(@data-a-target,'card-') and @href]")
if err != nil {
log.Println(err)
2020-07-28 10:56:27 +00:00
break
}
2020-09-08 10:24:51 +00:00
if len(hrefs) == lasthreflen {
delayerror--
if delayerror <= 0 {
break
2020-07-31 10:04:10 +00:00
}
2020-09-08 10:24:51 +00:00
} else {
delayerror = 7
}
lasthreflen = len(hrefs)
for ii := 0; ii < 10; ii++ {
for _, card := range cards {
href, err := card.GetAttribute("href")
if err != nil {
log.Println(href, err)
continue
} else {
hrefs[href] = true
}
}
break
2020-07-28 10:56:27 +00:00
}
2020-08-04 06:12:00 +00:00
2020-09-08 10:24:51 +00:00
if ps.IsClose() {
break
}
2020-07-28 10:56:27 +00:00
2020-09-08 10:24:51 +00:00
if len(cards) > 10 {
log.Println(len(cards))
wd.ExecuteScript(`items = document.evaluate("//div[@data-target='directory-page__card-container']/../self::div[@data-target and @style]", document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
2020-07-28 10:56:27 +00:00
for (var i = 0; i < items.snapshotLength - 10; i++) { item = items.snapshotItem(i); item.remove() ;};`, nil)
2020-09-08 10:24:51 +00:00
}
time.Sleep(time.Millisecond * 200)
wd.KeyDown(selenium.EndKey)
time.Sleep(time.Millisecond * 200)
wd.KeyUp(selenium.EndKey)
time.Sleep(time.Millisecond * 2500)
2020-07-28 10:56:27 +00:00
}
2020-09-08 10:24:51 +00:00
for href := range hrefs {
2020-07-28 10:56:27 +00:00
2020-09-08 10:24:51 +00:00
sl := &intimate.StreamerList{}
sl.Url = href
sl.UrlHash = intimate.GetUrlHash(sl.Url)
sl.Platform = string(intimate.Ptwitch)
sl.UpdateTime = intimate.GetUpdateTimeNow()
err := intimate.TStreamerList.Insert(sl)
if err != nil {
log.Println(err)
}
// TODO: Save href
// source := &intimate.Source{}
// source.Source = sql.NullString{String: href, Valid: true}
// source.Operator = 0
// source.Target = intimate.TTwitchChannel
// source.Url = weburl
// sstore.Insert(source)
}
2020-07-24 10:48:33 +00:00
2020-09-08 10:24:51 +00:00
log.Println("hrefs len:", len(hrefs))
// sstore.Deduplicate(intimate.TTwitchChannel, "source")
2020-09-10 09:33:52 +00:00
// wd.Close()
// wd.Quit()
2020-09-08 10:24:51 +00:00
time.Sleep(time.Minute * 30)
}
}