TODO: twitch get all userid list
This commit is contained in:
parent
cbdedb6795
commit
ea650f91dc
|
@ -4,6 +4,9 @@ package intimate
|
|||
type Platform string
|
||||
|
||||
const (
|
||||
// Popenrec openrec源table名称
|
||||
// Popenrec openrec 平台
|
||||
Popenrec Platform = "openrec"
|
||||
|
||||
// Ptwitch twitch 平台
|
||||
Ptwitch Platform = "twitch"
|
||||
)
|
||||
|
|
8
store.go
8
store.go
|
@ -85,6 +85,14 @@ func (store *StoreSource) Insert(isource IGet) {
|
|||
}
|
||||
}
|
||||
|
||||
// Deduplicate 去重
|
||||
func (store *StoreSource) Deduplicate(target Target, field string) {
|
||||
_, err := store.db.Exec(`DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + `force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(target) + `) ;`)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update 更新数据
|
||||
func (store *StoreSource) Update(isource IGet) {
|
||||
_, err := store.db.Exec("update "+store.table+" set ext = ?, pass_gob = ?, operator = ?, error_msg = ? where uid = ?", isource.Get("Ext"), isource.Get("PassGob"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
||||
|
|
|
@ -12,4 +12,7 @@ const (
|
|||
|
||||
// TTwitchChannel twitch 获取类别操作目标
|
||||
TTwitchChannel Target = "twitch_channel"
|
||||
|
||||
// TTwitchUser twitch 获取类别操作目标
|
||||
TTwitchUser Target = "twitch_user"
|
||||
)
|
||||
|
|
6
tasks/twitch/twitch_task1/main.go
Normal file
6
tasks/twitch/twitch_task1/main.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
package main
|
||||
|
||||
func main() {
|
||||
e := ChannelLink{}
|
||||
e.Execute()
|
||||
}
|
|
@ -2,13 +2,11 @@ package main
|
|||
|
||||
import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"intimate"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
"github.com/tebeka/selenium/chrome"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
|
@ -25,32 +23,10 @@ type ChannelLink struct {
|
|||
|
||||
// Execute 执行任务
|
||||
func (cl *ChannelLink) Execute() {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
chromecaps := chrome.Capabilities{}
|
||||
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/home/eson/test/ssh-key/cache")
|
||||
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
|
||||
caps.AddChrome(chromecaps)
|
||||
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", 3030)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", 3030))
|
||||
defer func() {
|
||||
if err := wd.Close(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
}()
|
||||
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
weburl := "https://www.twitch.tv/directory?sort=VIEWER_COUNT"
|
||||
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
|
@ -81,6 +57,7 @@ func (cl *ChannelLink) Execute() {
|
|||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
// xpath: //article//a[@data-a-target='preview-card-title-link']
|
||||
for _, ele := range elements {
|
||||
href, err := ele.GetAttribute("href")
|
||||
if err != nil {
|
||||
|
@ -94,4 +71,6 @@ func (cl *ChannelLink) Execute() {
|
|||
source.Url = weburl
|
||||
sstore.Insert(source)
|
||||
}
|
||||
|
||||
sstore.Deduplicate(intimate.TTwitchChannel, "source")
|
||||
}
|
||||
|
|
|
@ -8,3 +8,7 @@ func TestCase1(t *testing.T) {
|
|||
e := ChannelLink{}
|
||||
e.Execute()
|
||||
}
|
||||
|
||||
func TestLiveUrl(t *testing.T) {
|
||||
|
||||
}
|
||||
|
|
6
tasks/twitch/twitch_task2/main.go
Normal file
6
tasks/twitch/twitch_task2/main.go
Normal file
|
@ -0,0 +1,6 @@
|
|||
package main
|
||||
|
||||
func main() {
|
||||
ul := UserList{}
|
||||
ul.Execute()
|
||||
}
|
114
tasks/twitch/twitch_task2/task_twitch.go
Normal file
114
tasks/twitch/twitch_task2/task_twitch.go
Normal file
|
@ -0,0 +1,114 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
)
|
||||
|
||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitch))
|
||||
|
||||
// estore 解析存储连接实例
|
||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||
|
||||
// 获取类型的所有频道链接
|
||||
|
||||
// UserList 频道链接
|
||||
type UserList struct {
|
||||
}
|
||||
|
||||
// Execute 执行任务
|
||||
func (cl *UserList) Execute() {
|
||||
// DELETE FROM source_twitch WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, source FROM source_twitch ) s GROUP BY s.source) ;
|
||||
//article//a[@data-a-target='preview-card-title-link']
|
||||
var err error
|
||||
wd := intimate.GetChromeDriver(3030)
|
||||
|
||||
sourceChannel, err := sstore.Pop(intimate.TTwitchChannel)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
weburl := sourceChannel.Source.String + "?sort=VIEWER_COUNT"
|
||||
err = wd.Get(weburl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
wd.WaitWithTimeout(func(wd selenium.WebDriver) (bool, error) {
|
||||
_, err := wd.FindElement(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}, time.Second*10)
|
||||
|
||||
btn, err := wd.FindElement(selenium.ByXPATH, "//button[@data-a-target='browse-sort-menu']")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
btn.Click()
|
||||
|
||||
var elements []selenium.WebElement
|
||||
var liveurls = 0
|
||||
var delayerror = 3
|
||||
for i := 0; i < 2; i++ {
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "(//div/p[@class=''])[last()]")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd.KeyDown(selenium.EndKey)
|
||||
time.Sleep(time.Second * 2)
|
||||
if len(elements) == liveurls {
|
||||
delayerror--
|
||||
if delayerror <= 0 {
|
||||
break
|
||||
}
|
||||
} else {
|
||||
delayerror = 3
|
||||
}
|
||||
}
|
||||
elements, err = wd.FindElements(selenium.ByXPATH, "//article//a[@data-a-target='preview-card-title-link' and @href]")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
for _, e := range elements {
|
||||
|
||||
attr, err := e.GetAttribute("href")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer := &intimate.Streamer{}
|
||||
|
||||
matches := regexp.MustCompile(`https://www.twitch.tv/(\w+)`).FindStringSubmatch(attr)
|
||||
if len(matches) == 2 {
|
||||
streamer.UserId = matches[1]
|
||||
} else {
|
||||
log.Println(attr)
|
||||
continue
|
||||
}
|
||||
|
||||
streamer.Platform = intimate.Ptwitch
|
||||
|
||||
updateUrl := make(map[string]string)
|
||||
updateUrl["live"] = attr
|
||||
streamer.LiveUrl = sql.NullString{String: attr, Valid: true}
|
||||
data, err := json.Marshal(updateUrl)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
continue
|
||||
}
|
||||
streamer.UpdateUrl = data
|
||||
streamer.Operator = 0
|
||||
|
||||
estore.InsertStreamer(streamer)
|
||||
}
|
||||
}
|
7
tasks/twitch/twitch_task2/task_twitch_test.go
Normal file
7
tasks/twitch/twitch_task2/task_twitch_test.go
Normal file
|
@ -0,0 +1,7 @@
|
|||
package main
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestMain(t *testing.T) {
|
||||
main()
|
||||
}
|
32
utils.go
32
utils.go
|
@ -1,8 +1,13 @@
|
|||
package intimate
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/tebeka/selenium"
|
||||
"github.com/tebeka/selenium/chrome"
|
||||
)
|
||||
|
||||
var zeroTime time.Time
|
||||
|
@ -42,3 +47,30 @@ func ParseDuration(dt string) (time.Duration, error) {
|
|||
}
|
||||
return tdt.Sub(zeroTime), nil
|
||||
}
|
||||
|
||||
func GetChromeDriver(port int) selenium.WebDriver {
|
||||
caps := selenium.Capabilities{"browserName": "chrome"}
|
||||
chromecaps := chrome.Capabilities{}
|
||||
err := chromecaps.AddExtension("/home/eson/test/ssh-key/0.1.2_0.crx")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
chromecaps.Args = append(chromecaps.Args, "--disk-cache-dir=/tmp/chromedriver-cache")
|
||||
chromecaps.ExcludeSwitches = append(chromecaps.ExcludeSwitches, "enable-automation")
|
||||
caps.AddChrome(chromecaps)
|
||||
_, err = selenium.NewChromeDriverService("/usr/bin/chromedriver", port)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
wd, err := selenium.NewRemote(caps, fmt.Sprintf("http://localhost:%d/wd/hub", port))
|
||||
runtime.SetFinalizer(wd, func(obj interface{}) {
|
||||
if err := wd.Close(); err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
})
|
||||
wd.ExecuteScript("windows.navigator.webdriver = undefined", nil)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return wd
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user