From f86c2554077c94477b2ffc76649fe40019b16ae9 Mon Sep 17 00:00:00 2001 From: eson Date: Thu, 3 Sep 2020 14:17:54 +0800 Subject: [PATCH] finish nimo --- autostore_test.go | 28 +++++ extractor/nimo_extractor/nimo_extractor.go | 108 +++++++++++++++++- .../nimo_extractor/nimo_extractor_test.go | 77 ------------- supervisor_conf/nimo_extractor.conf | 10 ++ 4 files changed, 145 insertions(+), 78 deletions(-) create mode 100644 autostore_test.go create mode 100644 supervisor_conf/nimo_extractor.conf diff --git a/autostore_test.go b/autostore_test.go new file mode 100644 index 0000000..2c53220 --- /dev/null +++ b/autostore_test.go @@ -0,0 +1,28 @@ +package intimate + +import ( + "log" + "reflect" + "testing" +) + +type Store struct { +} + +func NewStore() *Store { + return &Store{} +} + +func (store *Store) Update(obj interface{}, objfields ...interface{}) { + ov := reflect.ValueOf(obj) + ot := ov.Type() + log.Printf("%#v,%#v", ov, ot) + log.Println(reflect.Indirect(reflect.ValueOf(objfields[0]))) +} + +func TestAutoStore(t *testing.T) { + store := NewStore() + streamer := &Streamer{} + + store.Update(streamer, streamer.Channel) +} diff --git a/extractor/nimo_extractor/nimo_extractor.go b/extractor/nimo_extractor/nimo_extractor.go index 375cf4c..f1bb1b5 100644 --- a/extractor/nimo_extractor/nimo_extractor.go +++ b/extractor/nimo_extractor/nimo_extractor.go @@ -1,9 +1,115 @@ package main -import "intimate" +import ( + "database/sql" + "intimate" + "log" + "time" + + "github.com/474420502/extractor" + "github.com/tebeka/selenium" +) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STNimo)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() + +func main() { + Execute() +} + +type LiveInfo struct { + Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"` + Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"` + Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"` + Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"` +} + +func Execute() { + wd := intimate.GetChromeDriver(3030) + count := 0 + countlimit := 200 + + waitfor := intimate.NewWaitFor(wd) + ps := intimate.NewPerfectShutdown() + + for !ps.IsClose() { + streamer, err := estore.Pop(intimate.PNimo) + if err != nil { + log.Println(err) + estore.UpdateError(streamer, err) + continue + } + + wd.Get(streamer.LiveUrl.String) + // wd.Get("https://www.nimo.tv/live/1253835677") + + waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil) + waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil) + element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]") + if err != nil { + log.Println(streamer.Uid, err) + } else { + err = element.MoveTo(50, 50) + element.Click() + if err != nil { + log.Println(streamer.Uid, err) + } + } + waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil) + + var pagesource string + pagesource, _ = wd.PageSource() + + etor := extractor.ExtractHtmlString(pagesource) + li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo) + // log.Printf("%#v", li) + + utime := sql.NullTime{Time: time.Now(), Valid: true} + + clog := &intimate.CollectLog{} + clog.Platform = intimate.PNimo + clog.Followers = sql.NullInt64{Int64: li.Followers, Valid: true} + clog.Views = sql.NullInt64{Int64: li.Views, Valid: true} + clog.UpdateTime = utime + clog.StreamerUid = streamer.Uid + + var sum int64 = 0 + for _, v := range li.Gratuity { + sum += v + } + clog.Gratuity = sql.NullInt64{Int64: sum, Valid: true} + + cuid := estore.InsertClog(clog) + + streamer.Channel = sql.NullString{String: li.Channel, Valid: true} + streamer.LatestLogUid = cuid + streamer.UpdateTime = utime + streamer.Operator = 0 + + switch { + case li.Followers <= 1000: + streamer.UpdateInterval = 720 + case li.Followers <= 10000: + streamer.UpdateInterval = 360 + case li.Followers <= 100000: + streamer.UpdateInterval = 180 + case li.Followers <= 1000000: + streamer.UpdateInterval = 90 + default: + streamer.UpdateInterval = 60 + } + + estore.Update(streamer, "update_interval", streamer.UpdateInterval, "operator", streamer.Operator, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime) + + count++ + if count >= countlimit { + count = 0 + wd.Close() + wd.Quit() + wd = intimate.GetChromeDriver(3030) + } + } +} diff --git a/extractor/nimo_extractor/nimo_extractor_test.go b/extractor/nimo_extractor/nimo_extractor_test.go index 5491776..b80dba9 100644 --- a/extractor/nimo_extractor/nimo_extractor_test.go +++ b/extractor/nimo_extractor/nimo_extractor_test.go @@ -1,86 +1,9 @@ package main import ( - "database/sql" - "intimate" - "log" "testing" - "time" - - "github.com/474420502/extractor" - "github.com/tebeka/selenium" ) func TestMain(t *testing.T) { Execute() } - -type LiveInfo struct { - Followers int64 `exp:"//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2']" mth:"r:ExtractNumber"` - Views int64 `exp:"//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']" mth:"r:ExtractNumber"` - Channel string `exp:"//div[contains(@class,'nimo-rm_type')]//span"` - Gratuity []int64 `exp:"//div[contains(@class,'rank-item-after3')]//span[contains(@class,'nimo-currency__count')]"` -} - -func Execute() { - wd := intimate.GetChromeDriver(3031) - waitfor := intimate.NewWaitFor(wd) - ps := intimate.NewPerfectShutdown() - - for !ps.IsClose() { - streamer, err := estore.Pop(intimate.PNimo) - if err != nil { - log.Println(err) - estore.UpdateError(streamer, err) - continue - } - - wd.Get(streamer.LiveUrl.String) - // wd.Get("https://www.nimo.tv/live/1253835677") - - waitfor.Default("//div[contains(@class,'nimo-rm_followers')]//span[@class='text c2' and text() != '']", nil) - waitfor.WaitWithTimeout("//div[contains(@class,'rank-item-top3')]", 7*time.Second, nil) - element, err := wd.FindElement(selenium.ByXPATH, "//div[contains(@class,'rank-item-top3')]") - if err != nil { - log.Println(streamer.Uid, err) - } else { - err = element.MoveTo(50, 50) - element.Click() - if err != nil { - log.Println(streamer.Uid, err) - } - } - waitfor.Default("//div[contains(@class,'nimo-rm_audience')]//span[@class='text c2']", nil) - - var pagesource string - pagesource, _ = wd.PageSource() - - etor := extractor.ExtractHtmlString(pagesource) - li := etor.GetObjectByTag(LiveInfo{}).(*LiveInfo) - // log.Printf("%#v", li) - - utime := sql.NullTime{Time: time.Now(), Valid: true} - - clog := &intimate.CollectLog{} - clog.Platform = intimate.PNimo - clog.Followers = sql.NullInt64{Int64: li.Followers, Valid: true} - clog.Views = sql.NullInt64{Int64: li.Views, Valid: true} - clog.UpdateTime = utime - clog.StreamerUid = streamer.Uid - - var sum int64 = 0 - for _, v := range li.Gratuity { - sum += v - } - clog.Gratuity = sql.NullInt64{Int64: sum, Valid: true} - - cuid := estore.InsertClog(clog) - - streamer.Channel = sql.NullString{String: li.Channel, Valid: true} - streamer.LatestLogUid = cuid - streamer.UpdateTime = utime - streamer.Operator = 0 - - estore.Update(streamer, "channel", streamer.Channel, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime) - } -} diff --git a/supervisor_conf/nimo_extractor.conf b/supervisor_conf/nimo_extractor.conf new file mode 100644 index 0000000..fccd98c --- /dev/null +++ b/supervisor_conf/nimo_extractor.conf @@ -0,0 +1,10 @@ +[supervisord] +nodaemon=true + +[program:nimo_extractor] +directory = MYPATH/bin/nimo_extractor/ +command= MYPATH/bin/nimo_extractor/nimo_extractor +autorestart=true +stderr_logfile=MYPATH/bin/nimo_extractor/log +stderr_logfile_maxbytes=0 +stopsignal=QUIT