finish: 重构openrec
This commit is contained in:
parent
a50c879d83
commit
d1298dc3f3
|
@ -15,6 +15,5 @@ import (
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
oe := &OpenrecExtractor{}
|
Execute()
|
||||||
oe.Execute()
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,11 +3,9 @@ package main
|
||||||
import (
|
import (
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
|
"errors"
|
||||||
"intimate"
|
"intimate"
|
||||||
"log"
|
"log"
|
||||||
"regexp"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/474420502/extractor"
|
"github.com/474420502/extractor"
|
||||||
|
@ -17,20 +15,23 @@ import (
|
||||||
var estore = intimate.NewStoreExtractor()
|
var estore = intimate.NewStoreExtractor()
|
||||||
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
|
var sstore = intimate.NewStoreSource(string(intimate.STOpenrec))
|
||||||
|
|
||||||
// OpenrecExtractor 提取方法
|
//UserInfo 提取信息的结构体
|
||||||
type OpenrecExtractor struct {
|
|
||||||
user *intimate.ExtractorSource
|
|
||||||
userLive *intimate.ExtractorSource
|
|
||||||
supporters *intimate.ExtractorSource
|
|
||||||
}
|
|
||||||
|
|
||||||
type UserInfo struct {
|
type UserInfo struct {
|
||||||
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
|
UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"`
|
||||||
Followers int `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
|
Followers int64 `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"`
|
||||||
Views int `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
|
Views int64 `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) Execute() {
|
//UserLive 提取信息的结构体
|
||||||
|
type UserLive struct {
|
||||||
|
Title string `exp:"//h1[contains(@class,'MovieTitle__Title')]"`
|
||||||
|
LiveStartTime string `exp:"//meta[@itemprop='uploadDate']/@content"`
|
||||||
|
LiveEndTime string `exp:"//meta[@itemprop='duration']/@content"`
|
||||||
|
Tags []string `exp:"//a[contains(@class,'TagButton')]"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Execute 执行
|
||||||
|
func Execute() {
|
||||||
|
|
||||||
ps := intimate.NewPerfectShutdown()
|
ps := intimate.NewPerfectShutdown()
|
||||||
|
|
||||||
|
@ -47,6 +48,7 @@ func (oe *OpenrecExtractor) Execute() {
|
||||||
time.Sleep(time.Second * 5)
|
time.Sleep(time.Second * 5)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
lasterr = nil
|
||||||
|
|
||||||
sdata := source.Ext.([]byte)
|
sdata := source.Ext.([]byte)
|
||||||
datamap := gjson.ParseBytes(sdata).Map()
|
datamap := gjson.ParseBytes(sdata).Map()
|
||||||
|
@ -56,31 +58,80 @@ func (oe *OpenrecExtractor) Execute() {
|
||||||
|
|
||||||
streamer := &intimate.Streamer{}
|
streamer := &intimate.Streamer{}
|
||||||
streamer.UserId = userId
|
streamer.UserId = userId
|
||||||
// streamer.Platform = intimate.Popenrec
|
// streamer.Platform = intimate.Popenrec 不需要更新字段
|
||||||
|
|
||||||
htmlUser := datamap["html_user"]
|
htmlUser := datamap["html_user"]
|
||||||
oe.user = intimate.NewExtractorSource(&htmlUser)
|
|
||||||
oe.user.CreateExtractor()
|
|
||||||
|
|
||||||
userEtor := extractor.ExtractHtmlString(htmlUser.String())
|
userEtor := extractor.ExtractHtmlString(htmlUser.String())
|
||||||
log.Println(userEtor.GetObjectByTag(UserInfo{}))
|
ui, ok1 := userEtor.GetObjectByTag(UserInfo{}).(*UserInfo)
|
||||||
|
|
||||||
htmlLive := datamap["html_live"]
|
htmlLive := datamap["html_live"]
|
||||||
oe.userLive = intimate.NewExtractorSource(&htmlLive)
|
|
||||||
oe.userLive.CreateExtractor()
|
liveEtor := extractor.ExtractHtmlString(htmlLive.String())
|
||||||
|
ul, ok2 := liveEtor.GetObjectByTag(UserLive{}).(*UserLive)
|
||||||
|
|
||||||
jsonSupporters := datamap["json_supporters"]
|
jsonSupporters := datamap["json_supporters"]
|
||||||
oe.supporters = intimate.NewExtractorSource(&jsonSupporters)
|
|
||||||
clog := &intimate.CollectLog{}
|
clog := &intimate.CollectLog{}
|
||||||
|
|
||||||
// log.Println(anchorId)
|
if ok1 {
|
||||||
|
clog.Followers = sql.NullInt64{Int64: ui.Followers, Valid: true}
|
||||||
|
clog.Views = sql.NullInt64{Int64: ui.Views, Valid: true}
|
||||||
|
if ui.Views != 0 {
|
||||||
|
clog.IsLiveStreaming = true
|
||||||
|
}
|
||||||
|
streamer.UserName = sql.NullString{String: ui.UserName, Valid: true}
|
||||||
|
|
||||||
oe.extractFollowers(clog)
|
giverjson := jsonSupporters
|
||||||
oe.extractUserName(streamer)
|
var givers []interface{}
|
||||||
oe.extractViewsAndLiveStreaming(clog)
|
var gratuity int64 = 0
|
||||||
oe.extractGiversAndGratuity(clog)
|
|
||||||
oe.extractLive(clog)
|
for _, v := range giverjson.Array() {
|
||||||
oe.extractTags(clog)
|
giverSource := gjson.Parse(v.String())
|
||||||
|
for _, item := range giverSource.Get("data.items").Array() {
|
||||||
|
givers = append(givers, item.Map())
|
||||||
|
gratuity += item.Get("total_yells").Int()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
giversbytes, err := json.Marshal(givers)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
clog.ErrorMsg = sql.NullString{String: err.Error(), Valid: true}
|
||||||
|
} else {
|
||||||
|
clog.Giver = giversbytes
|
||||||
|
}
|
||||||
|
|
||||||
|
clog.Gratuity = sql.NullInt64{Int64: gratuity, Valid: true}
|
||||||
|
} else {
|
||||||
|
log.Println("UserInfo may be not exists")
|
||||||
|
estore.UpdateError(streamer, errors.New("UserInfo may be not exists"))
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
//log.Println(ul)
|
||||||
|
if ok2 {
|
||||||
|
clog.LiveTitle = sql.NullString{String: ul.Title, Valid: true}
|
||||||
|
|
||||||
|
startTime, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", ul.LiveStartTime, time.Local)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
} else {
|
||||||
|
clog.LiveStartTime = sql.NullTime{Time: startTime.Local(), Valid: true}
|
||||||
|
duration, err := intimate.ParseDuration(ul.LiveEndTime)
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
} else {
|
||||||
|
endTime := startTime.Add(duration)
|
||||||
|
clog.LiveStartTime = sql.NullTime{Time: endTime.Local(), Valid: true}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if tags, err := json.Marshal(ul.Tags); err == nil {
|
||||||
|
clog.Tags = tags
|
||||||
|
} else {
|
||||||
|
log.Println("json error", ul.Tags, clog.Tags)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
streamer.Uid = source.StreamerId.Int64
|
streamer.Uid = source.StreamerId.Int64
|
||||||
streamer.UpdateTime = source.UpdateTime
|
streamer.UpdateTime = source.UpdateTime
|
||||||
|
@ -89,6 +140,7 @@ func (oe *OpenrecExtractor) Execute() {
|
||||||
clog.Platform = intimate.Popenrec
|
clog.Platform = intimate.Popenrec
|
||||||
clog.UserId = userId
|
clog.UserId = userId
|
||||||
clog.UpdateTime = source.UpdateTime
|
clog.UpdateTime = source.UpdateTime
|
||||||
|
clog.StreamerUid = streamer.Uid
|
||||||
|
|
||||||
logUid := estore.InsertClog(clog)
|
logUid := estore.InsertClog(clog)
|
||||||
|
|
||||||
|
@ -113,135 +165,3 @@ func (oe *OpenrecExtractor) Execute() {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
|
|
||||||
extractor := oe.user.GetExtractor()
|
|
||||||
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
if !xp.NodeIter().Next() {
|
|
||||||
log.Println("不存在粉丝数")
|
|
||||||
}
|
|
||||||
|
|
||||||
followers := strings.ReplaceAll(xp.String(), ",", "")
|
|
||||||
followersInt, err := strconv.ParseInt(followers, 10, 64)
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) {
|
|
||||||
extractor := oe.user.GetExtractor()
|
|
||||||
xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()")
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
} else {
|
|
||||||
if xp.NodeIter().Next() {
|
|
||||||
userName := xp.String()
|
|
||||||
streamer.Set("UserName", sql.NullString{String: userName, Valid: true})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
|
|
||||||
extractor := oe.user.GetExtractor()
|
|
||||||
// c-contents
|
|
||||||
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if xp.NodeIter().Next() {
|
|
||||||
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
|
|
||||||
views = strings.ReplaceAll(views, ",", "")
|
|
||||||
viewsint, err := strconv.Atoi(views)
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
|
|
||||||
clog.Set("IsLiveStreaming", true)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
|
|
||||||
// extractor := oe.user.GetExtractor()
|
|
||||||
giverjson := oe.supporters.GetSource()
|
|
||||||
var givers []interface{}
|
|
||||||
var gratuity int64 = 0
|
|
||||||
|
|
||||||
for _, v := range giverjson.Array() {
|
|
||||||
giverSource := gjson.Parse(v.String())
|
|
||||||
for _, item := range giverSource.Get("data.items").Array() {
|
|
||||||
givers = append(givers, item.Map())
|
|
||||||
gratuity += item.Get("total_yells").Int()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
giversbytes, err := json.Marshal(givers)
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
|
|
||||||
} else {
|
|
||||||
clog.Set("Giver", giversbytes)
|
|
||||||
}
|
|
||||||
|
|
||||||
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
|
|
||||||
}
|
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
|
|
||||||
extractor := oe.userLive.GetExtractor()
|
|
||||||
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
|
|
||||||
if len(mathes) == 2 {
|
|
||||||
|
|
||||||
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
|
|
||||||
|
|
||||||
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
iter := content.NodeIter()
|
|
||||||
if iter.Next() {
|
|
||||||
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
// log.Println(iter.Node().NodeValue(), tm.Local())
|
|
||||||
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
|
|
||||||
|
|
||||||
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
diter := duration.NodeIter()
|
|
||||||
if diter.Next() {
|
|
||||||
|
|
||||||
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
endtm := tm.Add(dt)
|
|
||||||
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
|
|
||||||
var tags []string
|
|
||||||
matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
|
|
||||||
for _, m := range matheslist {
|
|
||||||
tags = append(tags, m[1])
|
|
||||||
}
|
|
||||||
tagsBytes, err := json.Marshal(tags)
|
|
||||||
if err != nil {
|
|
||||||
log.Println(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
clog.Set("Tags", tagsBytes)
|
|
||||||
}
|
|
||||||
|
|
|
@ -114,6 +114,5 @@ func TestUserName(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestExtractor(t *testing.T) {
|
func TestExtractor(t *testing.T) {
|
||||||
oe := &OpenrecExtractor{}
|
Execute()
|
||||||
oe.Execute()
|
|
||||||
}
|
}
|
||||||
|
|
2
go.mod
2
go.mod
|
@ -3,7 +3,7 @@ module intimate
|
||||||
go 1.14
|
go 1.14
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9
|
github.com/474420502/extractor v0.9.4-0.20200817020657-7d26da5d1e89
|
||||||
github.com/474420502/focus v0.12.0
|
github.com/474420502/focus v0.12.0
|
||||||
github.com/474420502/gcurl v0.2.0
|
github.com/474420502/gcurl v0.2.0
|
||||||
github.com/474420502/hunter v0.3.4
|
github.com/474420502/hunter v0.3.4
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -8,6 +8,8 @@ github.com/474420502/extractor v0.9.3 h1:Cjri64DbgWQQ64EjPiBSQfUH9l0cYlzU8py0PQu
|
||||||
github.com/474420502/extractor v0.9.3/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4=
|
github.com/474420502/extractor v0.9.3/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4=
|
||||||
github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9 h1:cxgsTQwRJSiML4yBL40n/0pD/FbEqkCIXE7qq6hJyLg=
|
github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9 h1:cxgsTQwRJSiML4yBL40n/0pD/FbEqkCIXE7qq6hJyLg=
|
||||||
github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4=
|
github.com/474420502/extractor v0.9.4-0.20200814111732-bc270321f8f9/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4=
|
||||||
|
github.com/474420502/extractor v0.9.4-0.20200817020657-7d26da5d1e89 h1:6g4sPgooFdsVAdxNMhP6sqKQ0Z5EPBb4tGj9/absPoY=
|
||||||
|
github.com/474420502/extractor v0.9.4-0.20200817020657-7d26da5d1e89/go.mod h1:Ss0KTfwsdB4XBpNda/V50rx21V9bl6/eQmyl50mjAS4=
|
||||||
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
|
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
|
||||||
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
|
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
|
||||||
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
|
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
|
||||||
|
|
Loading…
Reference in New Issue
Block a user