2020-07-16 07:25:55 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"database/sql"
|
|
|
|
"encoding/json"
|
|
|
|
"intimate"
|
|
|
|
"log"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2020-08-14 11:26:03 +00:00
|
|
|
"github.com/474420502/extractor"
|
2020-07-16 07:25:55 +00:00
|
|
|
"github.com/tidwall/gjson"
|
|
|
|
)
|
|
|
|
|
2020-07-22 12:00:02 +00:00
|
|
|
var estore = intimate.NewStoreExtractor()
|
|
|
|
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
|
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
// OpenrecExtractor 提取方法
|
|
|
|
type OpenrecExtractor struct {
|
|
|
|
user *intimate.ExtractorSource
|
|
|
|
userLive *intimate.ExtractorSource
|
|
|
|
supporters *intimate.ExtractorSource
|
|
|
|
}
|
|
|
|
|
2020-08-14 11:26:03 +00:00
|
|
|
type UserInfo struct {
|
|
|
|
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
|
|
|
|
Followers int `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
|
|
|
|
Views int `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
|
|
|
|
}
|
|
|
|
|
2020-07-16 10:31:13 +00:00
|
|
|
func (oe *OpenrecExtractor) Execute() {
|
|
|
|
|
2020-08-13 11:11:53 +00:00
|
|
|
ps := intimate.NewPerfectShutdown()
|
2020-07-16 10:31:13 +00:00
|
|
|
|
|
|
|
var lasterr error = nil
|
2020-08-13 11:11:53 +00:00
|
|
|
for !ps.IsClose() {
|
2020-07-22 12:00:02 +00:00
|
|
|
var err error
|
|
|
|
|
2020-07-23 10:29:56 +00:00
|
|
|
source, err := sstore.Pop(intimate.TOpenrecUser, 0)
|
2020-07-16 10:31:13 +00:00
|
|
|
if err != nil {
|
|
|
|
if err != lasterr {
|
|
|
|
log.Println(err, lasterr)
|
|
|
|
lasterr = err
|
|
|
|
}
|
2020-07-31 10:04:10 +00:00
|
|
|
time.Sleep(time.Second * 5)
|
2020-07-27 11:30:54 +00:00
|
|
|
continue
|
2020-07-16 10:31:13 +00:00
|
|
|
}
|
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
sdata := source.Ext.([]byte)
|
|
|
|
datamap := gjson.ParseBytes(sdata).Map()
|
|
|
|
|
2020-07-17 10:21:38 +00:00
|
|
|
source.Operator = int32(intimate.OperatorError)
|
2020-07-21 07:05:56 +00:00
|
|
|
userId := datamap["var_user_id"].String()
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-17 10:21:38 +00:00
|
|
|
streamer := &intimate.Streamer{}
|
|
|
|
streamer.UserId = userId
|
2020-08-13 11:11:53 +00:00
|
|
|
// streamer.Platform = intimate.Popenrec
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-22 12:00:02 +00:00
|
|
|
htmlUser := datamap["html_user"]
|
|
|
|
oe.user = intimate.NewExtractorSource(&htmlUser)
|
2020-07-21 07:05:56 +00:00
|
|
|
oe.user.CreateExtractor()
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-08-14 11:26:03 +00:00
|
|
|
userEtor := extractor.ExtractHtmlString(htmlUser.String())
|
|
|
|
log.Println(userEtor.GetObjectByTag(UserInfo{}))
|
|
|
|
|
2020-07-22 12:00:02 +00:00
|
|
|
htmlLive := datamap["html_live"]
|
|
|
|
oe.userLive = intimate.NewExtractorSource(&htmlLive)
|
2020-07-21 07:05:56 +00:00
|
|
|
oe.userLive.CreateExtractor()
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-22 12:00:02 +00:00
|
|
|
jsonSupporters := datamap["json_supporters"]
|
|
|
|
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
|
2020-07-21 07:05:56 +00:00
|
|
|
clog := &intimate.CollectLog{}
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
// log.Println(anchorId)
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
oe.extractFollowers(clog)
|
|
|
|
oe.extractUserName(streamer)
|
|
|
|
oe.extractViewsAndLiveStreaming(clog)
|
|
|
|
oe.extractGiversAndGratuity(clog)
|
|
|
|
oe.extractLive(clog)
|
|
|
|
oe.extractTags(clog)
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
streamer.Uid = source.StreamerId.Int64
|
|
|
|
streamer.UpdateTime = source.UpdateTime
|
2020-07-31 10:04:10 +00:00
|
|
|
streamer.Tags = clog.Tags
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-08-11 10:26:17 +00:00
|
|
|
clog.Platform = intimate.Popenrec
|
2020-07-21 07:05:56 +00:00
|
|
|
clog.UserId = userId
|
|
|
|
clog.UpdateTime = source.UpdateTime
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-31 10:04:10 +00:00
|
|
|
logUid := estore.InsertClog(clog)
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
LiveUrl := "https://www.openrec.tv/live/" + userId
|
2020-08-13 11:11:53 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true}
|
|
|
|
streamer.LatestLogUid = logUid
|
2020-08-13 11:11:53 +00:00
|
|
|
// streamer.Operator = 0
|
|
|
|
|
|
|
|
log.Println(streamer.UserId)
|
|
|
|
estore.Update(streamer,
|
|
|
|
"user_name", streamer.UserName,
|
|
|
|
"user_id", streamer.UserId,
|
|
|
|
"live_url", streamer.LiveUrl,
|
|
|
|
"latest_log_uid", streamer.LatestLogUid,
|
|
|
|
"update_time", streamer.UpdateTime,
|
|
|
|
"tags", streamer.Tags,
|
|
|
|
)
|
2020-07-16 10:31:13 +00:00
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
source.Operator = int32(intimate.OperatorExtractorOK)
|
|
|
|
sstore.UpdateOperator(source)
|
2020-07-22 12:00:02 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 10:31:13 +00:00
|
|
|
}
|
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
|
|
|
|
extractor := oe.user.GetExtractor()
|
|
|
|
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
if !xp.NodeIter().Next() {
|
|
|
|
log.Println("不存在粉丝数")
|
|
|
|
}
|
|
|
|
|
|
|
|
followers := strings.ReplaceAll(xp.String(), ",", "")
|
|
|
|
followersInt, err := strconv.ParseInt(followers, 10, 64)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
|
|
|
|
}
|
|
|
|
|
2020-07-21 07:05:56 +00:00
|
|
|
func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
|
2020-07-16 07:25:55 +00:00
|
|
|
extractor := oe.user.GetExtractor()
|
2020-07-21 07:05:56 +00:00
|
|
|
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
|
2020-07-17 10:21:38 +00:00
|
|
|
if err != nil {
|
2020-07-16 07:25:55 +00:00
|
|
|
log.Println(err)
|
2020-07-17 10:21:38 +00:00
|
|
|
} else {
|
|
|
|
if xp.NodeIter().Next() {
|
|
|
|
userName := xp.String()
|
2020-07-21 07:05:56 +00:00
|
|
|
streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
|
2020-07-17 10:21:38 +00:00
|
|
|
}
|
2020-07-16 07:25:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
|
|
|
|
extractor := oe.user.GetExtractor()
|
|
|
|
// c-contents
|
|
|
|
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
2020-07-22 12:00:02 +00:00
|
|
|
|
2020-07-16 07:25:55 +00:00
|
|
|
if xp.NodeIter().Next() {
|
|
|
|
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
|
|
|
|
views = strings.ReplaceAll(views, ",", "")
|
|
|
|
viewsint, err := strconv.Atoi(views)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
|
2020-07-21 07:05:56 +00:00
|
|
|
clog.Set("IsLiveStreaming", true)
|
2020-07-16 07:25:55 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
|
|
|
|
// extractor := oe.user.GetExtractor()
|
|
|
|
giverjson := oe.supporters.GetSource()
|
|
|
|
var givers []interface{}
|
|
|
|
var gratuity int64 = 0
|
|
|
|
|
|
|
|
for _, v := range giverjson.Array() {
|
|
|
|
giverSource := gjson.Parse(v.String())
|
|
|
|
for _, item := range giverSource.Get("data.items").Array() {
|
|
|
|
givers = append(givers, item.Map())
|
|
|
|
gratuity += item.Get("total_yells").Int()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
giversbytes, err := json.Marshal(givers)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
|
|
|
|
} else {
|
|
|
|
clog.Set("Giver", giversbytes)
|
|
|
|
}
|
|
|
|
|
|
|
|
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
|
|
|
|
extractor := oe.userLive.GetExtractor()
|
|
|
|
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
|
|
|
|
if len(mathes) == 2 {
|
|
|
|
|
|
|
|
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
|
|
|
|
|
|
|
|
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
iter := content.NodeIter()
|
|
|
|
if iter.Next() {
|
|
|
|
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
2020-07-21 07:05:56 +00:00
|
|
|
// log.Println(iter.Node().NodeValue(), tm.Local())
|
2020-07-16 07:25:55 +00:00
|
|
|
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
|
|
|
|
|
|
|
|
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
diter := duration.NodeIter()
|
|
|
|
if diter.Next() {
|
|
|
|
|
|
|
|
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
endtm := tm.Add(dt)
|
|
|
|
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
|
|
|
|
var tags []string
|
2020-07-16 10:31:13 +00:00
|
|
|
matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
|
2020-07-16 07:25:55 +00:00
|
|
|
for _, m := range matheslist {
|
|
|
|
tags = append(tags, m[1])
|
|
|
|
}
|
|
|
|
tagsBytes, err := json.Marshal(tags)
|
|
|
|
if err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
clog.Set("Tags", tagsBytes)
|
|
|
|
}
|