intimate/extractor/nimo_extractor/nimo_extractor.go

126 lines
3.6 KiB
Go
Raw Normal View History

package main
2020-09-03 06:17:54 +00:00
import (
"database/sql"
"intimate"
"log"
"time"
"github.com/474420502/extractor"
"github.com/tebeka/selenium"
)
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
2020-09-09 09:25:36 +00:00
// var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo))
2020-09-09 09:25:36 +00:00
// // estore 解析存储连接实例
// var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
2020-09-03 06:17:54 +00:00
func main() {
Execute()
}
type LiveInfo struct {
Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"`
Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"`
Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"`
}
func Execute() {
2020-09-11 10:52:04 +00:00
adriver := intimate.GetChromeDriver()
2020-09-03 06:17:54 +00:00
count := 0
countlimit := 200
2020-09-11 10:52:04 +00:00
wd := adriver.Webdriver
2020-09-03 06:17:54 +00:00
waitfor := intimate.NewWaitFor(wd)
ps := intimate.NewPerfectShutdown()
2020-09-09 09:25:36 +00:00
queue := intimate.TStreamer.Queue(intimate.Streamer{}, intimate.ConditionDefault(intimate.PNimo))
2020-09-03 06:17:54 +00:00
for !ps.IsClose() {
2020-09-09 09:25:36 +00:00
istreamer, err := queue.Pop()
2020-09-03 06:17:54 +00:00
if err != nil {
log.Println(err)
2020-09-09 09:25:36 +00:00
intimate.TStreamer.UpdateError(istreamer, err)
2020-09-03 06:17:54 +00:00
continue
}
2020-09-09 09:25:36 +00:00
streamer := istreamer.(*intimate.Streamer)
2020-09-03 06:17:54 +00:00
wd.Get(streamer.LiveUrl.String)
// wd.Get("https://www.nimo.tv/live/1253835677")
waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil)
waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil)
element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]")
if err != nil {
log.Println(streamer.Uid, err)
} else {
err = element.MoveTo(50, 50)
element.Click()
if err != nil {
log.Println(streamer.Uid, err)
}
}
waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil)
var pagesource string
pagesource, _ = wd.PageSource()
etor := extractor.ExtractHtmlString(pagesource)
li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo)
// log.Printf("%#v", li)
utime := sql.NullTime{Time: time.Now(), Valid: true}
clog := &intimate.CollectLog{}
clog.Platform = intimate.PNimo
2020-09-09 09:25:36 +00:00
clog.Followers = &sql.NullInt64{Int64: li.Followers, Valid: true}
clog.Views = &sql.NullInt64{Int64: li.Views, Valid: true}
clog.UpdateTime = &utime
2020-09-03 06:17:54 +00:00
clog.StreamerUid = streamer.Uid
var sum int64 = 0
for _, v := range li.Gratuity {
sum += v
}
2020-09-09 09:25:36 +00:00
clog.Gratuity = &sql.NullInt64{Int64: sum, Valid: true}
2020-09-03 06:17:54 +00:00
2020-09-09 09:25:36 +00:00
cuid, err := intimate.TClog.InsertRetAutoID(clog)
if err != nil {
panic(err)
}
2020-09-03 06:17:54 +00:00
2020-09-09 09:25:36 +00:00
streamer.Channel = &sql.NullString{String: li.Channel, Valid: true}
2020-09-03 06:17:54 +00:00
streamer.LatestLogUid = cuid
2020-09-09 09:25:36 +00:00
streamer.UpdateTime = &utime
2020-09-03 06:17:54 +00:00
streamer.Operator = 0
switch {
case li.Followers <= 1000:
streamer.UpdateInterval = 720
case li.Followers <= 10000:
streamer.UpdateInterval = 360
case li.Followers <= 100000:
streamer.UpdateInterval = 180
case li.Followers <= 1000000:
streamer.UpdateInterval = 90
default:
streamer.UpdateInterval = 60
}
2020-09-09 09:25:36 +00:00
// estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime)
err = intimate.TStreamer.Update(streamer)
if err != nil {
panic(err)
}
2020-09-03 06:17:54 +00:00
count++
if count >= countlimit {
count = 0
2020-09-11 10:52:04 +00:00
adriver.Close()
adriver = intimate.GetChromeDriver()
2020-09-03 06:17:54 +00:00
}
}
}