2020-08-11 10:26:17 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"database/sql"
|
|
|
|
"encoding/json"
|
|
|
|
"intimate"
|
|
|
|
"log"
|
|
|
|
"net/url"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/474420502/extractor"
|
|
|
|
"github.com/474420502/focus/compare"
|
|
|
|
"github.com/474420502/focus/tree/heap"
|
|
|
|
"github.com/474420502/requests"
|
|
|
|
)
|
|
|
|
|
|
|
|
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
|
|
|
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting))
|
|
|
|
|
|
|
|
// estore 解析存储连接实例
|
|
|
|
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
|
|
|
|
|
|
|
type SearchProfile struct {
|
|
|
|
UserName string `exp:".//span[@class='username']" method:"Text"`
|
|
|
|
UserId string // `exp:".//span[@class='fullname']" method:"Text"`
|
|
|
|
LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"AttributeValue,href"`
|
|
|
|
Tag []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Text"`
|
|
|
|
TagUrl []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"AttributeValue,href"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func Execute() {
|
|
|
|
homeurl := "https://twitcasting.tv"
|
|
|
|
searchurl := "https://twitcasting.tv/rankingindex.php"
|
|
|
|
queuedict := make(map[string]bool)
|
|
|
|
queue := heap.New(compare.String)
|
|
|
|
queue.Put(searchurl)
|
|
|
|
queuedict[searchurl] = true
|
|
|
|
ses := requests.NewSession()
|
|
|
|
ses.Config().SetTimeout(15)
|
|
|
|
|
|
|
|
var surl interface{}
|
|
|
|
var ok bool
|
|
|
|
|
|
|
|
ps := intimate.NewPerfectShutdown()
|
|
|
|
|
|
|
|
for surl, ok = queue.Pop(); ok && !ps.IsClose(); surl, ok = queue.Pop() {
|
|
|
|
u, err := url.Parse(surl.(string))
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
resp, err := ses.Get(u.String()).Execute()
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
log.Println(u.String(), surl)
|
|
|
|
continue
|
|
|
|
// log.Panic(err)
|
|
|
|
}
|
|
|
|
|
2020-08-12 09:42:27 +00:00
|
|
|
etor := extractor.ExtractHtml(resp.Content())
|
2020-08-11 10:26:17 +00:00
|
|
|
result, err := etor.XPaths("//p[@class='taglist']/a[contains(@class, 'tag')]/@href")
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
for _, href := range result.GetTexts() {
|
|
|
|
|
|
|
|
wurl := homeurl + href
|
|
|
|
if ok := queuedict[wurl]; !ok {
|
|
|
|
log.Println(wurl)
|
|
|
|
sl := &intimate.StreamerList{}
|
|
|
|
sl.Platform = intimate.Ptwitcasting
|
|
|
|
sl.Url = wurl
|
|
|
|
sl.Operator = 0
|
|
|
|
sl.UpdateInterval = 120
|
|
|
|
sl.UpdateTime = time.Now()
|
2020-09-08 10:24:51 +00:00
|
|
|
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
2020-08-11 10:26:17 +00:00
|
|
|
|
2020-09-08 10:24:51 +00:00
|
|
|
intimate.TStreamerList.Insert(sl)
|
|
|
|
// estore.InsertStreamerList(sl)
|
2020-08-11 10:26:17 +00:00
|
|
|
|
|
|
|
queue.Put(wurl)
|
|
|
|
queuedict[wurl] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
xps, err := etor.XPaths("//div[@class='tw-search-result-row']")
|
|
|
|
if err != nil {
|
|
|
|
log.Println(surl, err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-08-14 11:26:03 +00:00
|
|
|
var splist = xps.ForEachObjectByTag(SearchProfile{})
|
2020-08-11 10:26:17 +00:00
|
|
|
for _, isp := range splist {
|
|
|
|
sp := isp.(*SearchProfile)
|
|
|
|
if sp.LiveUrl == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
sp.UserId = sp.LiveUrl[1:]
|
|
|
|
for i := 0; i < len(sp.TagUrl); i++ {
|
|
|
|
wurl := homeurl + sp.TagUrl[i]
|
|
|
|
sp.TagUrl[i] = wurl
|
|
|
|
if ok := queuedict[wurl]; !ok {
|
|
|
|
sl := &intimate.StreamerList{}
|
|
|
|
sl.Platform = intimate.Ptwitcasting
|
|
|
|
sl.Url = wurl
|
|
|
|
sl.Operator = 0
|
|
|
|
sl.UpdateInterval = 120
|
|
|
|
sl.UpdateTime = time.Now()
|
2020-09-08 10:24:51 +00:00
|
|
|
sl.UrlHash = intimate.GetUrlHash(sl.Url)
|
|
|
|
intimate.TStreamerList.Insert(sl)
|
2020-08-11 10:26:17 +00:00
|
|
|
|
|
|
|
queue.Put(wurl)
|
|
|
|
queuedict[wurl] = true
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// log.Println(sp.(SearchProfile))
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Println("find user:", len(splist))
|
|
|
|
for _, isp := range splist {
|
|
|
|
sp := isp.(*SearchProfile)
|
|
|
|
// log.Println(sp)
|
|
|
|
streamer := &intimate.Streamer{}
|
|
|
|
streamer.Platform = intimate.Ptwitcasting
|
2020-09-07 10:52:59 +00:00
|
|
|
streamer.LiveUrl = &sql.NullString{String: sp.LiveUrl, Valid: true}
|
2020-08-11 10:26:17 +00:00
|
|
|
if btags, err := json.Marshal(sp.Tag); err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
} else {
|
|
|
|
streamer.Tags = btags
|
|
|
|
}
|
|
|
|
streamer.UpdateInterval = 120
|
2020-09-07 10:52:59 +00:00
|
|
|
streamer.UpdateTime = intimate.GetUpdateTimeNow()
|
|
|
|
streamer.UserName = &sql.NullString{String: sp.UserName, Valid: true}
|
|
|
|
streamer.UserId = &sp.UserId
|
|
|
|
streamer.Operator = 0
|
2020-09-07 10:12:18 +00:00
|
|
|
// estore.InsertStreamer(streamer)
|
|
|
|
intimate.TStreamer.Insert(streamer)
|
2020-08-11 10:26:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
log.Println("finish remain", queue.Size())
|
|
|
|
}
|
|
|
|
}
|