From b82b4f5b774f5f24fa2ce25d941268c0acb3845b Mon Sep 17 00:00:00 2001 From: eson Date: Tue, 11 Aug 2020 18:26:17 +0800 Subject: [PATCH] finish tag slice --- .../openrec_extractor/openrec_extractor.go | 2 +- .../twitcasting_extractor.go | 140 +++ .../twitcasting_extractor_test.go | 12 + .../twitch_extractor/tiwtch_extractor.go | 11 +- extractor_field.go | 2 +- go.mod | 3 +- go.sum | 42 +- store.go | 2 +- .../twitcasting/twitcasting_task1/.gitignore | 2 + .../twitcasting/twitcasting_task1/error.html | 991 ++++++++++++++++++ tasks/twitcasting/twitcasting_task1/main.go | 4 + .../twitcasting_task1/main_test.go | 177 +--- .../twitcasting_task1/twitcasting.go | 140 +++ 13 files changed, 1317 insertions(+), 211 deletions(-) create mode 100644 extractor/twitcasting_extractor/twitcasting_extractor.go create mode 100644 extractor/twitcasting_extractor/twitcasting_extractor_test.go create mode 100644 tasks/twitcasting/twitcasting_task1/.gitignore create mode 100755 tasks/twitcasting/twitcasting_task1/error.html create mode 100644 tasks/twitcasting/twitcasting_task1/twitcasting.go diff --git a/extractor/openrec_extractor/openrec_extractor.go b/extractor/openrec_extractor/openrec_extractor.go index 3b689ad..41cf76d 100644 --- a/extractor/openrec_extractor/openrec_extractor.go +++ b/extractor/openrec_extractor/openrec_extractor.go @@ -87,7 +87,7 @@ func (oe *OpenrecExtractor) Execute() { streamer.UpdateTime = source.UpdateTime streamer.Tags = clog.Tags - clog.Platform = string(intimate.Popenrec) + clog.Platform = intimate.Popenrec clog.UserId = userId clog.UpdateTime = source.UpdateTime diff --git a/extractor/twitcasting_extractor/twitcasting_extractor.go b/extractor/twitcasting_extractor/twitcasting_extractor.go new file mode 100644 index 0000000..44e059f --- /dev/null +++ b/extractor/twitcasting_extractor/twitcasting_extractor.go @@ -0,0 +1,140 @@ +package main + +import ( + "database/sql" + "intimate" + "log" + "regexp" + "strconv" + "strings" + "time" + + "github.com/474420502/extractor" + "github.com/474420502/requests" +) + +// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql +var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec)) + +// estore 解析存储连接实例 +var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() + +type LiveData struct { + UserName string `exp:"//span[@class='tw-live-author__info-username']" method:"Text"` + Follower string `exp:"(//span[@class='tw-user-nav-list-count'])[2]" method:"Text"` + MaxViews string `exp:"//span[@id='max_viewer_count']" method:"Text"` + LiveTitle string `exp:"//meta[@property='og:title']" method:"AttributeValue,content"` + LiveStart string `exp:"//span[@id='updatetimer']" method:"AttributeValue,data-started-at"` + LiveDuration string `exp:"//span[@id='updatetimer']" method:"AttributeValue,data-duration"` + Tags []string `exp:"//div[@class='tw-live-author__commandbox--tags']//a[@class='tag tag-info']"` +} + +func main() { + + ps := intimate.NewPerfectShutdown() + + for !ps.IsClose() { + + streamer, err := estore.Pop(intimate.Ptwitcasting) + if err != nil { + log.Println(err) + } + + ses := requests.NewSession() + resp, err := ses.Get("https://twitcasting.tv/kyunenee09").Execute() + if err != nil { + log.Panic(err) + } + var ldata *LiveData + etor := extractor.ExtractXml(resp.Content()) + ldata = etor.GetObjectByTag(LiveData{}).(*LiveData) + + ldata.MaxViews = regexp.MustCompile("\\d+").FindString(ldata.MaxViews) + ldata.LiveStart = ldata.LiveStart[:len(ldata.LiveStart)-3] + ldata.LiveDuration = ldata.LiveDuration[:len(ldata.LiveDuration)-3] + // log.Println(etor.GetObjectByTag(LiveData{})) + coincount := 0 + + for i := 0; ; i++ { + + giverurl := "https://twitcasting.tv/kyunenee09/backers/" + strconv.Itoa(i) + resp, err = ses.Get(giverurl).Execute() + if err != nil { + log.Panic(err) + } + etor := extractor.ExtractXml(resp.Content()) + xp, err := etor.XPaths("//td[@class='tw-memorial-table-recent-point']") + if err != nil { + log.Panic(err) + } + + coins := xp.GetTexts() + + for _, cointxt := range coins { + scointxt := strings.Split(cointxt, "/") + if len(scointxt) == 2 { + coin := strings.Trim(scointxt[1], " ") + c, err := strconv.Atoi(coin) + if err == nil { + coincount += c + } + log.Println(coin, coincount) + } else { + log.Println("coin error: ", cointxt) + } + } + + if len(coins) < 20 { + break + } + } + + streamer.Platform = intimate.Ptwitcasting + streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} + streamer.UpdateInterval = 60 + streamer.UserName = sql.NullString{String: ldata.UserName, Valid: true} + streamer.Operator = 0 + + clog := &intimate.CollectLog{} + clog.UserId = streamer.UserId + clog.Gratuity = sql.NullInt64{Int64: int64(coincount), Valid: true} + clog.Platform = streamer.Platform + clog.UpdateTime = streamer.UpdateTime + clog.LiveTitle = sql.NullString{String: ldata.LiveTitle, Valid: true} + fl, err := strconv.Atoi(ldata.Follower) + if err == nil { + clog.Followers = sql.NullInt64{Int64: int64(fl), Valid: true} + } else { + log.Println(err) + } + + views, err := strconv.Atoi(ldata.MaxViews) + if err == nil { + clog.Views = sql.NullInt64{Int64: int64(views), Valid: true} + } else { + log.Println(err) + } + + st, err := strconv.Atoi(ldata.LiveStart) + if err == nil { + startTime := time.Unix(int64(st), 0) + clog.LiveStartTime = sql.NullTime{Time: startTime, Valid: true} + dt, err := strconv.Atoi(ldata.LiveDuration) + if err == nil { + + endTime := startTime.Add((time.Duration)(dt) * time.Second) + clog.LiveEndTime = sql.NullTime{Time: endTime, Valid: true} + } else { + log.Println(err) + } + + } else { + log.Println(err) + } + + streamer.LatestLogUid = estore.InsertClog(clog) + estore.UpdateStreamer(streamer) + + break + } +} diff --git a/extractor/twitcasting_extractor/twitcasting_extractor_test.go b/extractor/twitcasting_extractor/twitcasting_extractor_test.go new file mode 100644 index 0000000..2d4df4f --- /dev/null +++ b/extractor/twitcasting_extractor/twitcasting_extractor_test.go @@ -0,0 +1,12 @@ +package main + +import "testing" + +// type LiveData struct { +// UserName string `exp:".//span[@class='tw-live-author__info-username']" method:"Text"` +// Follower string `exp:".//span[@class='tw-user-nav-list-count']" method:"Text"` +// } + +func TestMain(t *testing.T) { + main() +} diff --git a/extractor/twitch_extractor/tiwtch_extractor.go b/extractor/twitch_extractor/tiwtch_extractor.go index bd2c6cf..187ae3e 100644 --- a/extractor/twitch_extractor/tiwtch_extractor.go +++ b/extractor/twitch_extractor/tiwtch_extractor.go @@ -6,6 +6,7 @@ import ( "intimate" "log" "regexp" + "strings" "time" "github.com/tebeka/selenium" @@ -48,6 +49,7 @@ func main() { var updateUrl map[string]string json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) liveUrl := updateUrl["live"] + liveUrl = strings.Replace(liveUrl, "/watchparty", "", -1) log.Println(liveUrl) // err = wd.Get("https://www.twitch.tv/zoe_0601" + "/about") @@ -67,6 +69,13 @@ func main() { time.Sleep(time.Millisecond * 500) err = extractUserName(wd, streamer) if err != nil { + _, err = wd.FindElement(selenium.ByXPATH, "//a[@data-a-target='browse-channels-button']") + if err == nil { + log.Println(streamer.UserId, "may be cancell") + streamer.Operator = 5 + streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} + estore.UpdateStreamer(streamer) + } continue } err = extractFollowers(wd, clog) @@ -94,7 +103,7 @@ func main() { } streamer.Platform = intimate.Ptwitch - clog.Platform = string(streamer.Platform) + clog.Platform = streamer.Platform clog.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} lastClogId := estore.InsertClog(clog) diff --git a/extractor_field.go b/extractor_field.go index c091c5b..2977941 100644 --- a/extractor_field.go +++ b/extractor_field.go @@ -78,7 +78,7 @@ type CollectLog struct { LogUid int64 // 日志id StreamerUid int64 // StreamerId 表id与 - Platform string // + Platform Platform // UserId string // 平台的UserId IsLiveStreaming bool // IsError bool // diff --git a/go.mod b/go.mod index 406d3f9..35e0665 100644 --- a/go.mod +++ b/go.mod @@ -3,14 +3,13 @@ module intimate go 1.14 require ( - github.com/474420502/extractor v0.4.1 + github.com/474420502/extractor v0.5.2 github.com/474420502/focus v0.12.0 github.com/474420502/gcurl v0.1.2 github.com/474420502/hunter v0.3.4 github.com/474420502/requests v1.6.0 github.com/go-sql-driver/mysql v1.5.0 github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb - github.com/stretchr/testify v1.6.1 // indirect github.com/tebeka/selenium v0.9.9 github.com/tidwall/gjson v1.6.0 github.com/tidwall/pretty v1.0.1 // indirect diff --git a/go.sum b/go.sum index 119a3b6..32dceb6 100644 --- a/go.sum +++ b/go.sum @@ -2,44 +2,18 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU= cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg= -github.com/474420502/extractor v0.3.0 h1:VURhjNFP2kG6DvPZfsRR/3JLYHURvsHazp/JazNYbME= -github.com/474420502/extractor v0.3.0/go.mod h1:thq0UAm30cMLY6+LJHPNRSw/H3ZrMGfmK0rk+HwycvE= -github.com/474420502/extractor v0.3.1 h1:IxOeJziOR3DPrZJhOcbOUzAc/UABmKUYGLdVgxSi9yk= -github.com/474420502/extractor v0.3.1/go.mod h1:thq0UAm30cMLY6+LJHPNRSw/H3ZrMGfmK0rk+HwycvE= -github.com/474420502/extractor v0.3.2 h1:KcgRC0+pNfK803uZjL76pgsfsnlKSMR1nQX6o6y8cVA= -github.com/474420502/extractor v0.3.2/go.mod h1:yQRtpUOeb37tMitCsenURnN2Yas9Jm/5HGFDCO+/20k= -github.com/474420502/extractor v0.3.3 h1:2/rCOEtTVkezGqz7E0D8KKN1QBKlQaihe+UMxNZcwNk= -github.com/474420502/extractor v0.3.3/go.mod h1:8cakB/mW3No6o2I7PtrVHQ35auIgHh0mGIfk1++UZm4= -github.com/474420502/extractor v0.3.4 h1:3lKV5oke46sDAxkiY4KGMeBiYI8hwNkiAa2Sf8B+xPY= -github.com/474420502/extractor v0.3.4/go.mod h1:+biDin5eKLuJQHNbW+HnPYCC+2LL090iCZNxQklB11Y= -github.com/474420502/extractor v0.3.5 h1:uq3SuPY51F1pYvAtnaJtcqtJ+yE7wcaq3LP9DWTtBnQ= -github.com/474420502/extractor v0.3.5/go.mod h1:pKjqYQCZquakvor/d9JJQYrTYInWKaVXjzAg+IM1/tY= -github.com/474420502/extractor v0.3.6 h1:Qsky2YYUCENz3BFzlFOOWykFyDOfigbkkCTnMAkKExE= -github.com/474420502/extractor v0.3.6/go.mod h1:rH+/kx0CS8xpzOBqraisQE1A9vfXAPZZ+091D8HYXvw= -github.com/474420502/extractor v0.3.7 h1:QDBd4mAjf6D+vH98LQ1SJByDTtLago9GDiEvN1oyDJ0= -github.com/474420502/extractor v0.3.7/go.mod h1:v0TAfUw1zNyFCYVqj5xyFVFpoqmqErvAd2SzMzR/yc8= -github.com/474420502/extractor v0.4.0 h1:h6MbrkCBPQ2/+VRAK741oVcZuDhZ2t4USt0MOIf/v2U= -github.com/474420502/extractor v0.4.0/go.mod h1:1oPuXIm7whY+/rU7hxDW3ick4hHc4AdiNqdk5vVWaXs= -github.com/474420502/extractor v0.4.1 h1:WqcwF7gyvGREBrXBAm3fLR7yqxP/P/arq/iHXZvt8Gg= -github.com/474420502/extractor v0.4.1/go.mod h1:1oPuXIm7whY+/rU7hxDW3ick4hHc4AdiNqdk5vVWaXs= +github.com/474420502/extractor v0.5.1 h1:A1heJJSYbV9nEaUHfl3/1HYXcsBQfsTzAHikgwg2IF0= +github.com/474420502/extractor v0.5.1/go.mod h1:vkqsbi7wXPqyi5Q5dchcGjiaWHbgOJOAEcwonBiAs/E= +github.com/474420502/extractor v0.5.2 h1:ndgrAkxJjQg0Nrbq3AX2/xAnmIJNxSHRFGQ78wEtWj4= +github.com/474420502/extractor v0.5.2/go.mod h1:vkqsbi7wXPqyi5Q5dchcGjiaWHbgOJOAEcwonBiAs/E= github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo= github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s= github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg= github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM= +github.com/474420502/htmlquery v1.2.4-0.20200810165859-a0e2c521c7c2 h1:4F1tpJ+sEkb3N+XD+Wb9MFiQmOMm3bHp8QUP+BQvkVk= +github.com/474420502/htmlquery v1.2.4-0.20200810165859-a0e2c521c7c2/go.mod h1:AoSN890esHwNKecV0tCs+W0ele1xgFL1Jqk6UcrdxgU= github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M= github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA= -github.com/474420502/libxml2 v0.0.0-20200806111302-aa4be92ad592 h1:kgvx2MvoMhkrzLVjM6C6RIcshgI80fnq5/LqAnTOMxQ= -github.com/474420502/libxml2 v0.0.0-20200806111302-aa4be92ad592/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= -github.com/474420502/libxml2 v0.0.0-20200807033034-1b43ad443d1d h1:MQduBAgnOCeGVUU+tawJxQLP1/Bgnn7119hGpVb9VFI= -github.com/474420502/libxml2 v0.0.0-20200807033034-1b43ad443d1d/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= -github.com/474420502/libxml2 v0.0.0-20200807033649-9731e0a44bf0 h1:EiO+pSoFk7TTv/TnVFCT/swjWQEeLAZ2wXeXsS+9+kY= -github.com/474420502/libxml2 v0.0.0-20200807033649-9731e0a44bf0/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= -github.com/474420502/libxml2 v0.0.0-20200807034854-eaa2a69a2790 h1:vzHGXv0e7MX+MSZcz4SjRJUfzoUpX96Qf0f48T6dkxk= -github.com/474420502/libxml2 v0.0.0-20200807034854-eaa2a69a2790/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= -github.com/474420502/libxml2 v0.0.0-20200807035356-cd2e51185f4b h1:q9qSCx9gm7gS6Xr2nmKqkiu2FApQJFkqvTsrAzcWXps= -github.com/474420502/libxml2 v0.0.0-20200807035356-cd2e51185f4b/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= -github.com/474420502/libxml2 v0.0.0-20200807040518-4ef6186ae68c h1:UZriMoPoXEA4Mq/yP+36sxwkOC3Jk3nqy2I7e3ZV470= -github.com/474420502/libxml2 v0.0.0-20200807040518-4ef6186ae68c/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34= github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY= github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo= github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= @@ -52,9 +26,8 @@ github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y= github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg= github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y= -github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= +github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0= github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= -github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ= @@ -71,6 +44,7 @@ github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gG github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= diff --git a/store.go b/store.go index e83718f..da79aa2 100644 --- a/store.go +++ b/store.go @@ -346,7 +346,7 @@ func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) { return true } - _, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Minute*60)) + _, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Hour*100000)) if err != nil { panic(err) } diff --git a/tasks/twitcasting/twitcasting_task1/.gitignore b/tasks/twitcasting/twitcasting_task1/.gitignore new file mode 100644 index 0000000..de4f65a --- /dev/null +++ b/tasks/twitcasting/twitcasting_task1/.gitignore @@ -0,0 +1,2 @@ +twitcasting_task1 +log \ No newline at end of file diff --git a/tasks/twitcasting/twitcasting_task1/error.html b/tasks/twitcasting/twitcasting_task1/error.html new file mode 100755 index 0000000..3a41b64 --- /dev/null +++ b/tasks/twitcasting/twitcasting_task1/error.html @@ -0,0 +1,991 @@ + + + + Live with Tag: vocaloid - TwitCasting + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + + +
+ +
+ +
+
+ +

+ Tag Search : + vocaloid

+ + + Live(0) / + User(28) + + + + + + +
+
+ +
+
+
+
+
+
+ + + +
+
+ + 月詠來夢@きゃす + +
@LhymeCyas
+
+
+
月詠來夢のきゃす垢
+ +
+
+
+
+
+
+
+ + + +
+
+ + SHIRATAMA + +
@c:shiratama_dango
+
+
+
A creature which enjoy singing, humming while walking, and eating.
+ +
+
+
+
+
+
+
+ + + +
+
+ + ずんちゃ + +
@zunguri1459
+
+
+
はろはわゆ
+ +
+
+
+
+
+
+
+ + + +
+
+ + したばま ましか + +
@kyabet001
+
+
+
2Dホラーアクションシューティング「CARLA」を製作中です PV: https://t.co/PXBDoaNFUV よかったらフォロ/フォロバオナシャス
+ +
+
+
+
+
+
+
+ + + +
+
+ + ぐろぐ + +
@c:grog
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + んぁゆ (NaYu) + +
@NaYu_NotYou
+
+
+
🇮🇩🛫🇦🇺 || Main Account || Korean Related: @NaYu_ha || Pull me into more hells- || icon: @AyaminTwT
+ +
+
+
+
+
+
+
+ + + +
+
+ + このは@モンハン勢 + +
@0konoha39
+
+
+
DIVA、白猫、モンハンが大好きな大学生です(((((└(:D」┌)┘)))))))
+ +
+
+
+
+
+
+
+ + + +
+
+ + f:Egao No ShouJou + +
@f:100000242133373
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + みゆ + +
@rubierin_sna
+
+
+
sing! やってます✨
+ +
+
+
+
+
+
+
+ + + +
+
+ + f:Yuu'll Be Back + +
@f:100004051574775
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + Mongdang + +
@c:Mongdang
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + 狼谷ありー + +
@kamiy_ari
+
+
+
歌が大好きです。
+ +
+
+
+
+
+
+
+ + + +
+
+ + f:Thamanan Wittayachamnankul + +
@f:100000714321056
+
+
+
อ่าา ชอบเกรียนที่สุดเบยย อ๊ายยย ขอให้ได้เกรียน เกรียนในสิ่งที่ชอบ และไม่เดือดร้อนคนอื่น โอเคโน๊ะ อยากจะเกรียนเพลงใส่กันเชิญจ้า กร๊ากก ผมเกรียนได้ทุกคนอยู่แระ ..
+ +
+
+
+
+
+
+
+ + + +
+
+ + hika + +
@hikamody
+
+
+
성인/잡덕/Bi🌈/Feminist/Flexitarian
+ +
+
+
+
+
+
+
+ + + +
+
+ + totally katië 123 + +
@katie_diva_xoxo
+
+
+
❤️❤️❤️zoella
+ +
+
+
+
+
+
+
+ + + +
+
+ + Fujisaki Hitomi + +
@c:vlemvpe
+
+
+
Fujisaki Hitomi +https://www.facebook.com/ciazfah2 +คนไทยค่า :D
+ +
+
+
+
+
+
+
+ + + +
+
+ + 初音@yu卍nan + +
@c:hatune3625
+
+
+
初音ミク大好き
+ +
+
+
+
+
+
+
+ + + +
+
+ + Sarah + +
@c:sara21ren
+
+
+
Hello. It's Sarah (: I play the piano and the guitar. Sometimes sing. Ttm hmu!!
+ +
+
+
+
+
+
+
+ + + +
+
+ + カワズ先輩≠カエル先輩 + +
@kawazu1816
+
+
+
愛したって、愛されたっていいじゃないか
+ +
+
+
+
+
+
+
+ + + +
+
+ + ヴァレン(実況者) + +
@varenturu
+
+
+
不定期で実況動画あげていく予定です。 よろしくお願いします!! 異常なほどな誤字脱字etc...沢山のハプニングがあると思います。 温かい目で見てやってくれればありが..
+ +
+
+
+
+
+
+
+ + + +
+
+ + BARI + +
@c:iasoa020
+
+
+
歌を歌います。少しでもプラスに慣れればと思います。
+ +
+
+
+
+
+
+
+ + + +
+
+ + f:Xio Steph + +
@f:100007528892324
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + f:Kama Jewell Greco + +
@f:489672901190991
+
+
+
+ +
+
+
+
+
+
+
+ + + +
+
+ + 姫柳/かんじむずかしい + +
@Kiryu_Hanabusa
+
+
+
YouTuberになるために準備中です。 基本受けの体制なので話し掛けてもらうまでROMってることが多いです。気軽にお声掛けください! codevein/競馬/麻雀/雀魂/FPS #姫柳の..
+ +
+
+
+
+ +
ただのザコなボカロファンです。
+ +
+
+
+
+
+
+
+ + + +
+
+ + (๑•̀ω•́๑) + +
@porkyuupine
+
+
+
❤ VOCALOID | 歌い手 | アニメと漫画 ❤ シンガポールからのファンです、よろしく〜☆  
+ +
+
+
+
+
+
+
+ + + +
+
+ + miming(みみん) + +
@c:mimingdayo
+
+
+
韓国人です。日本語勉強はじめたのは10年くらい前からで +留学はしたことありません。 +まぁ気ままにやります。 +ヾ(⌒(_*'ω'*)_
+ +
+
+
+ + + + + +
+ + + + + +
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tasks/twitcasting/twitcasting_task1/main.go b/tasks/twitcasting/twitcasting_task1/main.go index 06ab7d0..736ef31 100644 --- a/tasks/twitcasting/twitcasting_task1/main.go +++ b/tasks/twitcasting/twitcasting_task1/main.go @@ -1 +1,5 @@ package main + +func main() { + Execute() +} diff --git a/tasks/twitcasting/twitcasting_task1/main_test.go b/tasks/twitcasting/twitcasting_task1/main_test.go index 7acc76b..18bfb6a 100644 --- a/tasks/twitcasting/twitcasting_task1/main_test.go +++ b/tasks/twitcasting/twitcasting_task1/main_test.go @@ -1,26 +1,9 @@ package main import ( - "database/sql" - "encoding/json" - "intimate" - "net/http" "net/url" - "os" - "os/signal" - "syscall" - "time" - "github.com/474420502/extractor" - "github.com/474420502/focus/compare" - "github.com/474420502/focus/tree/heap" - - "log" "testing" - - _ "net/http/pprof" - - "github.com/474420502/requests" ) func Test(t *testing.T) { @@ -30,161 +13,13 @@ func Test(t *testing.T) { t.Error(u.String()) } -// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql -var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting)) - -// estore 解析存储连接实例 -var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() +func TestUpdateTime(t *testing.T) { + // streamer := &intimate.Streamer{} + // streamer.Uid = 420153 + // streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} + // estore.Update(streamer, "update_time", streamer.UpdateTime) +} func TestMain(t *testing.T) { - f, _ := os.OpenFile("./log", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm) - log.SetFlags(log.Llongfile | log.Ltime) - log.SetOutput(f) - go func() { - log.Println(http.ListenAndServe(":4040", nil)) - }() - - homeurl := "https://twitcasting.tv" - searchurl := "https://twitcasting.tv/rankingindex.php" - queuedict := make(map[string]bool) - queue := heap.New(compare.String) - queue.Put(searchurl) - queuedict[searchurl] = true - ses := requests.NewSession() - ses.Config().SetTimeout(15) - - var surl interface{} - var ok bool - var debugsp *SearchProfile - var content []byte - - defer func() { - if ierr := recover(); ierr != nil { - log.Println(surl, debugsp) - f, _ := os.OpenFile("./error.html", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm) - f.Write(content) - f.Close() - log.Panic(ierr) - } - }() - - go func() { - signalchan := make(chan os.Signal) - signal.Notify(signalchan, syscall.SIGINT, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP) - log.Println("accept stop command:", <-signalchan) - f, _ := os.OpenFile("./error.html", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm) - f.Write(content) - f.Close() - os.Exit(1) - }() - - for surl, ok = queue.Pop(); ok; surl, ok = queue.Pop() { - u, err := url.Parse(surl.(string)) - if err != nil { - log.Println(err) - continue - } - - resp, err := ses.Get(u.String()).Execute() - if err != nil { - log.Println(err) - log.Println(u.String(), surl) - continue - // log.Panic(err) - } - - content = resp.Content() - etor := extractor.ExtractXml(resp.Content()) - result, err := etor.XPath("//p[@class='taglist']/a[contains(@class, 'tag')]/@href") - if err != nil { - panic(err) - } - - iter := result.NodeIter() - for iter.Next() { - - wurl := homeurl + iter.Node().NodeValue() - if ok := queuedict[wurl]; !ok { - log.Println(wurl) - sl := &intimate.StreamerList{} - sl.Platform = intimate.Ptwitcasting - sl.Url = wurl - sl.Operator = 0 - sl.UpdateInterval = 120 - sl.UpdateTime = time.Now() - - estore.InsertStreamerList(sl) - - queue.Put(wurl) - queuedict[wurl] = true - } - } - - // doc.Find("//div[@class='tw-search-result-row']") - xps, err := etor.XPaths("//div[@class='tw-search-result-row']") - if err != nil { - log.Println(surl, err) - continue - } - - log.Println("extract tag") - var splist = xps.ForEachTag(SearchProfile{}) - log.Println("finish extract tag") - for _, isp := range splist { - sp := isp.(*SearchProfile) - if sp.LiveUrl == "" { - continue - } - - sp.UserId = sp.LiveUrl[1:] - for i := 0; i < len(sp.TagUrl); i++ { - wurl := homeurl + sp.TagUrl[i] - sp.TagUrl[i] = wurl - if ok := queuedict[wurl]; !ok { - sl := &intimate.StreamerList{} - sl.Platform = intimate.Ptwitcasting - sl.Url = wurl - sl.Operator = 0 - sl.UpdateInterval = 120 - sl.UpdateTime = time.Now() - estore.InsertStreamerList(sl) - - queue.Put(wurl) - queuedict[wurl] = true - } - } - // log.Println(sp.(SearchProfile)) - } - - log.Println("find user:", len(splist)) - for _, isp := range splist { - sp := isp.(*SearchProfile) - // log.Println(sp) - streamer := &intimate.Streamer{} - streamer.Platform = intimate.Ptwitcasting - streamer.LiveUrl = sql.NullString{String: sp.LiveUrl, Valid: true} - if btags, err := json.Marshal(sp.Tag); err != nil { - log.Println(err) - } else { - streamer.Tags = btags - } - streamer.UpdateInterval = 120 - streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} - streamer.UserName = sql.NullString{String: sp.UserName, Valid: true} - streamer.UserId = sp.UserId - debugsp = sp - estore.InsertStreamer(streamer) - } - - log.Println("finish remain", queue.Size()) - } -} - -type SearchProfile struct { - UserName string `exp:".//span[@class='username']" method:"Text"` - UserId string // `exp:".//span[@class='fullname']" method:"Text"` - LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"` - Tag []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Text"` - TagUrl []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Attribute,href Value"` } diff --git a/tasks/twitcasting/twitcasting_task1/twitcasting.go b/tasks/twitcasting/twitcasting_task1/twitcasting.go new file mode 100644 index 0000000..58e2cba --- /dev/null +++ b/tasks/twitcasting/twitcasting_task1/twitcasting.go @@ -0,0 +1,140 @@ +package main + +import ( + "database/sql" + "encoding/json" + "intimate" + "log" + "net/url" + "time" + + "github.com/474420502/extractor" + "github.com/474420502/focus/compare" + "github.com/474420502/focus/tree/heap" + "github.com/474420502/requests" +) + +// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql +var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting)) + +// estore 解析存储连接实例 +var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() + +type SearchProfile struct { + UserName string `exp:".//span[@class='username']" method:"Text"` + UserId string // `exp:".//span[@class='fullname']" method:"Text"` + LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"AttributeValue,href"` + Tag []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Text"` + TagUrl []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"AttributeValue,href"` +} + +func Execute() { + homeurl := "https://twitcasting.tv" + searchurl := "https://twitcasting.tv/rankingindex.php" + queuedict := make(map[string]bool) + queue := heap.New(compare.String) + queue.Put(searchurl) + queuedict[searchurl] = true + ses := requests.NewSession() + ses.Config().SetTimeout(15) + + var surl interface{} + var ok bool + + ps := intimate.NewPerfectShutdown() + + for surl, ok = queue.Pop(); ok && !ps.IsClose(); surl, ok = queue.Pop() { + u, err := url.Parse(surl.(string)) + if err != nil { + log.Println(err) + continue + } + + resp, err := ses.Get(u.String()).Execute() + if err != nil { + log.Println(err) + log.Println(u.String(), surl) + continue + // log.Panic(err) + } + + etor := extractor.ExtractXml(resp.Content()) + result, err := etor.XPaths("//p[@class='taglist']/a[contains(@class, 'tag')]/@href") + if err != nil { + panic(err) + } + + for _, href := range result.GetTexts() { + + wurl := homeurl + href + if ok := queuedict[wurl]; !ok { + log.Println(wurl) + sl := &intimate.StreamerList{} + sl.Platform = intimate.Ptwitcasting + sl.Url = wurl + sl.Operator = 0 + sl.UpdateInterval = 120 + sl.UpdateTime = time.Now() + + estore.InsertStreamerList(sl) + + queue.Put(wurl) + queuedict[wurl] = true + } + } + + xps, err := etor.XPaths("//div[@class='tw-search-result-row']") + if err != nil { + log.Println(surl, err) + continue + } + + var splist = xps.ForEachTag(SearchProfile{}) + for _, isp := range splist { + sp := isp.(*SearchProfile) + if sp.LiveUrl == "" { + continue + } + + sp.UserId = sp.LiveUrl[1:] + for i := 0; i < len(sp.TagUrl); i++ { + wurl := homeurl + sp.TagUrl[i] + sp.TagUrl[i] = wurl + if ok := queuedict[wurl]; !ok { + sl := &intimate.StreamerList{} + sl.Platform = intimate.Ptwitcasting + sl.Url = wurl + sl.Operator = 0 + sl.UpdateInterval = 120 + sl.UpdateTime = time.Now() + estore.InsertStreamerList(sl) + + queue.Put(wurl) + queuedict[wurl] = true + } + } + // log.Println(sp.(SearchProfile)) + } + + log.Println("find user:", len(splist)) + for _, isp := range splist { + sp := isp.(*SearchProfile) + // log.Println(sp) + streamer := &intimate.Streamer{} + streamer.Platform = intimate.Ptwitcasting + streamer.LiveUrl = sql.NullString{String: sp.LiveUrl, Valid: true} + if btags, err := json.Marshal(sp.Tag); err != nil { + log.Println(err) + } else { + streamer.Tags = btags + } + streamer.UpdateInterval = 120 + streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true} + streamer.UserName = sql.NullString{String: sp.UserName, Valid: true} + streamer.UserId = sp.UserId + estore.InsertStreamer(streamer) + } + + log.Println("finish remain", queue.Size()) + } +}