完成解析模块示例. 数据库入库测试.
TODO: 调整程序启动停止(非暴力关闭).
This commit is contained in:
parent
51fe6f6039
commit
7e3b36c7d0
215
extractor/openrec/openrec_extractor.go
Normal file
215
extractor/openrec/openrec_extractor.go
Normal file
|
@ -0,0 +1,215 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"log"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
// OpenrecExtractor 提取方法
|
||||
type OpenrecExtractor struct {
|
||||
user *intimate.ExtractorSource
|
||||
userLive *intimate.ExtractorSource
|
||||
supporters *intimate.ExtractorSource
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) {
|
||||
extractor := oe.user.GetExtractor()
|
||||
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
if !xp.NodeIter().Next() {
|
||||
log.Println("不存在粉丝数")
|
||||
}
|
||||
|
||||
followers := strings.ReplaceAll(xp.String(), ",", "")
|
||||
followersInt, err := strconv.ParseInt(followers, 10, 64)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true})
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractAnchorName(ai intimate.ISet) {
|
||||
extractor := oe.user.GetExtractor()
|
||||
xp, err := extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()")
|
||||
if xp.NodeIter().Next() {
|
||||
anchorName := xp.String()
|
||||
ai.Set("AnchorName", anchorName)
|
||||
} else {
|
||||
log.Println(err)
|
||||
}
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) {
|
||||
extractor := oe.user.GetExtractor()
|
||||
// c-contents
|
||||
xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
if xp.NodeIter().Next() {
|
||||
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
|
||||
views = strings.ReplaceAll(views, ",", "")
|
||||
viewsint, err := strconv.Atoi(views)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true})
|
||||
clog.Set("IsLiveStreaming", int32(1))
|
||||
}
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) {
|
||||
// extractor := oe.user.GetExtractor()
|
||||
giverjson := oe.supporters.GetSource()
|
||||
var givers []interface{}
|
||||
var gratuity int64 = 0
|
||||
|
||||
for _, v := range giverjson.Array() {
|
||||
giverSource := gjson.Parse(v.String())
|
||||
for _, item := range giverSource.Get("data.items").Array() {
|
||||
givers = append(givers, item.Map())
|
||||
gratuity += item.Get("total_yells").Int()
|
||||
}
|
||||
}
|
||||
|
||||
giversbytes, err := json.Marshal(givers)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
|
||||
} else {
|
||||
clog.Set("Giver", giversbytes)
|
||||
}
|
||||
|
||||
clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true})
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) {
|
||||
extractor := oe.userLive.GetExtractor()
|
||||
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(oe.userLive.GetSource().Str)
|
||||
if len(mathes) == 2 {
|
||||
|
||||
clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true})
|
||||
|
||||
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
iter := content.NodeIter()
|
||||
if iter.Next() {
|
||||
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true})
|
||||
|
||||
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
diter := duration.NodeIter()
|
||||
if diter.Next() {
|
||||
|
||||
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
endtm := tm.Add(dt)
|
||||
clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) {
|
||||
var tags []string
|
||||
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1)
|
||||
for _, m := range matheslist {
|
||||
tags = append(tags, m[1])
|
||||
}
|
||||
log.Println(tags)
|
||||
tagsBytes, err := json.Marshal(tags)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
clog.Set("Tags", tagsBytes)
|
||||
}
|
||||
|
||||
func (oe *OpenrecExtractor) Execute() {
|
||||
collect := intimate.NewExtractorStore()
|
||||
store := intimate.NewSourceStore("source_openrec")
|
||||
|
||||
for {
|
||||
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
anchorId := source.GetSource().String
|
||||
|
||||
ai := &intimate.AnchorInfo{}
|
||||
ai.SetAnchorId(anchorId)
|
||||
ai.SetPlatform(string(intimate.Popenrec))
|
||||
|
||||
sdata := source.GetExt().([]byte)
|
||||
|
||||
if gjson.ValidBytes(sdata) {
|
||||
result := gjson.ParseBytes(sdata)
|
||||
datamap := result.Map()
|
||||
|
||||
oe := &OpenrecExtractor{}
|
||||
oe.user = intimate.NewExtractorSource(datamap["user"])
|
||||
oe.user.CreateExtractor()
|
||||
|
||||
oe.userLive = intimate.NewExtractorSource(datamap["user_live"])
|
||||
oe.userLive.CreateExtractor()
|
||||
|
||||
oe.supporters = intimate.NewExtractorSource(datamap["supporters"])
|
||||
|
||||
clog := &intimate.CollectLog{}
|
||||
|
||||
oe.extractFollowers(clog)
|
||||
oe.extractAnchorName(ai)
|
||||
oe.extractViewsAndLiveStreaming(clog)
|
||||
oe.extractGiversAndGratuity(clog)
|
||||
oe.extractLive(clog)
|
||||
oe.extractTags(clog)
|
||||
|
||||
ai.Set("UpdateTime", source.GetUpdateTime())
|
||||
|
||||
LiveUrl := "https://www.openrec.tv/live/" + anchorId
|
||||
ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true})
|
||||
|
||||
Uid, err := collect.InsertAnchorInfo(ai)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
clog.Set("Uid", Uid)
|
||||
clog.Set("Platform", string(intimate.Popenrec))
|
||||
clog.Set("AnchorId", anchorId)
|
||||
clog.Set("UpdateTime", source.GetUpdateTime())
|
||||
|
||||
collect.InsertCollectLog(clog)
|
||||
} else {
|
||||
log.Println("data is not json:\n", string(sdata))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -2,19 +2,14 @@ package main
|
|||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"intimate"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/474420502/hunter"
|
||||
"github.com/474420502/requests"
|
||||
"github.com/lestrrat-go/libxml2"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
@ -100,187 +95,66 @@ func TestCase(t *testing.T) {
|
|||
}
|
||||
|
||||
func TestExtractor(t *testing.T) {
|
||||
|
||||
ses := requests.NewSession()
|
||||
tp := ses.Get("https://www.openrec.tv/user/Riowh/supporters")
|
||||
tp.Execute()
|
||||
|
||||
// t.Error(ses.GetCookies(wf.GetParsedURL()))
|
||||
|
||||
collect := intimate.NewExtractorStore()
|
||||
store := intimate.NewSourceStore("source_openrec")
|
||||
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
return
|
||||
}
|
||||
|
||||
anchorId := source.GetSource().String
|
||||
|
||||
ai := &intimate.AnchorInfo{}
|
||||
ai.SetAnchorId(anchorId)
|
||||
ai.SetPlatform(string(intimate.Popenrec))
|
||||
|
||||
sdata := source.GetExt().([]byte)
|
||||
|
||||
if gjson.ValidBytes(sdata) {
|
||||
result := gjson.ParseBytes(sdata)
|
||||
m := result.Map()
|
||||
|
||||
user := m["user"]
|
||||
|
||||
clog := &intimate.CollectLog{}
|
||||
extractor := hunter.NewExtractor([]byte(user.Str))
|
||||
xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
if !xp.NodeIter().Next() {
|
||||
t.Error("不存在粉丝数")
|
||||
}
|
||||
|
||||
followers := strings.ReplaceAll(xp.String(), ",", "")
|
||||
followersInt, err := strconv.ParseInt(followers, 10, 64)
|
||||
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
var anchorName string
|
||||
xp, err = extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()")
|
||||
if xp.NodeIter().Next() {
|
||||
anchorName = xp.String()
|
||||
} else {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
t.Error(source.GetSource())
|
||||
t.Error(anchorName)
|
||||
|
||||
ai.SetAnchorName(anchorName)
|
||||
|
||||
// c-contents
|
||||
xp, err = extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()")
|
||||
for {
|
||||
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
if xp.NodeIter().Next() {
|
||||
views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String())
|
||||
views = strings.ReplaceAll(views, ",", "")
|
||||
viewsint, err := strconv.Atoi(views)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
clog.SetViews(sql.NullInt64{Int64: int64(viewsint), Valid: true})
|
||||
clog.SetIsShowing(1)
|
||||
}
|
||||
|
||||
var givers []interface{}
|
||||
var gratuity int64 = 0
|
||||
giverjson := m["supporters"]
|
||||
for _, v := range giverjson.Array() {
|
||||
giverSource := gjson.Parse(v.String())
|
||||
for _, item := range giverSource.Get("data.items").Array() {
|
||||
givers = append(givers, item.Map())
|
||||
gratuity += item.Get("total_yells").Int()
|
||||
}
|
||||
}
|
||||
|
||||
giversbytes, err := json.Marshal(givers)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
clog.SetErrorMsg(sql.NullString{String: err.Error(), Valid: true})
|
||||
} else {
|
||||
clog.SetGiver(giversbytes)
|
||||
}
|
||||
|
||||
// MovieToolbar__Views-g5e6ic-13 iDRGyA
|
||||
livejson := m["user_live"]
|
||||
|
||||
f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
f.WriteString(livejson.String())
|
||||
|
||||
extractor = hunter.NewExtractor([]byte(livejson.Str))
|
||||
// xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]")
|
||||
// if err != nil {
|
||||
// t.Error(err)
|
||||
// }
|
||||
|
||||
mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})</h1>").FindStringSubmatch(livejson.Str)
|
||||
if len(mathes) == 2 {
|
||||
|
||||
clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true})
|
||||
|
||||
content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
iter := content.NodeIter()
|
||||
if iter.Next() {
|
||||
tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
clog.SetShowStartTime(sql.NullTime{Time: tm.Local(), Valid: true})
|
||||
|
||||
duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
diter := duration.NodeIter()
|
||||
if diter.Next() {
|
||||
|
||||
dt, err := intimate.ParseDuration(diter.Node().NodeValue())
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
endtm := tm.Add(dt)
|
||||
clog.SetShowEndTime(sql.NullTime{Time: endtm.Local(), Valid: true})
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var tags []string
|
||||
matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})</a`).FindAllStringSubmatch(livejson.Str, -1)
|
||||
for _, m := range matheslist {
|
||||
tags = append(tags, m[1])
|
||||
}
|
||||
t.Error(tags)
|
||||
tagsBytes, err := json.Marshal(tags)
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
|
||||
ai.SetTags(tagsBytes)
|
||||
ai.SetUpdateTime(source.GetUpdateTime())
|
||||
|
||||
LiveUrl := "https://www.openrec.tv/live/" + anchorId
|
||||
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})
|
||||
|
||||
Uid, err := collect.InsertAnchorInfo(ai)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
clog.SetUid(Uid)
|
||||
clog.SetTags(tagsBytes)
|
||||
clog.SetGratuity(sql.NullInt64{Int64: gratuity, Valid: true})
|
||||
clog.SetPlatform(string(intimate.Popenrec))
|
||||
clog.SetFollowers(sql.NullInt64{Int64: int64(followersInt), Valid: true})
|
||||
clog.SetAnchorId(anchorId)
|
||||
clog.SetUpdateTime(source.GetUpdateTime())
|
||||
anchorId := source.GetSource().String
|
||||
|
||||
collect.InsertCollectLog(clog)
|
||||
ai := &intimate.AnchorInfo{}
|
||||
ai.SetAnchorId(anchorId)
|
||||
ai.SetPlatform(string(intimate.Popenrec))
|
||||
|
||||
} else {
|
||||
t.Error("data is not json:\n", string(sdata))
|
||||
sdata := source.GetExt().([]byte)
|
||||
|
||||
if gjson.ValidBytes(sdata) {
|
||||
result := gjson.ParseBytes(sdata)
|
||||
datamap := result.Map()
|
||||
|
||||
oe := &OpenrecExtractor{}
|
||||
oe.user = intimate.NewExtractorSource(datamap["user"])
|
||||
oe.user.CreateExtractor()
|
||||
|
||||
oe.userLive = intimate.NewExtractorSource(datamap["user_live"])
|
||||
oe.userLive.CreateExtractor()
|
||||
|
||||
oe.supporters = intimate.NewExtractorSource(datamap["supporters"])
|
||||
|
||||
clog := &intimate.CollectLog{}
|
||||
|
||||
oe.extractFollowers(clog)
|
||||
oe.extractAnchorName(ai)
|
||||
oe.extractViewsAndLiveStreaming(clog)
|
||||
oe.extractGiversAndGratuity(clog)
|
||||
oe.extractLive(clog)
|
||||
oe.extractTags(clog)
|
||||
|
||||
ai.Set("UpdateTime", source.GetUpdateTime())
|
||||
|
||||
LiveUrl := "https://www.openrec.tv/live/" + anchorId
|
||||
ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true})
|
||||
|
||||
Uid, err := collect.InsertAnchorInfo(ai)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
clog.Set("Uid", Uid)
|
||||
clog.Set("Platform", string(intimate.Popenrec))
|
||||
clog.Set("AnchorId", anchorId)
|
||||
clog.Set("UpdateTime", source.GetUpdateTime())
|
||||
|
||||
collect.InsertCollectLog(clog)
|
||||
} else {
|
||||
t.Error("data is not json:\n", string(sdata))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,6 +2,10 @@ package intimate
|
|||
|
||||
import (
|
||||
"database/sql"
|
||||
"reflect"
|
||||
|
||||
"github.com/474420502/hunter"
|
||||
"github.com/tidwall/gjson"
|
||||
)
|
||||
|
||||
type ISetAnchorInfo interface {
|
||||
|
@ -28,28 +32,6 @@ type IGetAnchorInfo interface {
|
|||
GetUpdateTime() sql.NullTime //
|
||||
}
|
||||
|
||||
/*
|
||||
CREATE TABLE `anchor_info` (
|
||||
`uid` bigint AUTO_INCREMENT,
|
||||
`platform` varchar(255) NOT NULL,
|
||||
`anchor_id` varchar(255) NOT NULL,
|
||||
`anchor_name` varchar(255) NOT NULL,
|
||||
`live_url` text,
|
||||
`channel` varchar(128) DEFAULT NULL,
|
||||
`show_type` varchar(255) DEFAULT NULL,
|
||||
`ext` json DEFAULT NULL,
|
||||
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
PRIMARY KEY (`uid`),
|
||||
UNIQUE KEY `platform_anchor_id_idx` (`platform`, `anchor_id`),
|
||||
KEY `platform_idx` (`platform`),
|
||||
KEY `anchor_id_idx` (`anchor_id`),
|
||||
KEY `anchor_name_idx` (`anchor_name`),
|
||||
KEY `channel_idx` (`channel`),
|
||||
KEY `show_type_idx` (`show_type`),
|
||||
KEY `update_time_idx` (`update_time`)
|
||||
);
|
||||
*/
|
||||
|
||||
type AnchorInfo struct {
|
||||
Uid int64 //
|
||||
Platform string //
|
||||
|
@ -62,6 +44,11 @@ type AnchorInfo struct {
|
|||
UpdateTime sql.NullTime //
|
||||
}
|
||||
|
||||
// Set Simple Value
|
||||
func (ai *AnchorInfo) Set(field string, value interface{}) {
|
||||
reflect.ValueOf(ai).Elem().FieldByName(field).Set(reflect.ValueOf(value))
|
||||
}
|
||||
|
||||
// GetTags Get return Tags interface{}
|
||||
func (ai *AnchorInfo) GetTags() interface{} {
|
||||
return ai.Tags
|
||||
|
@ -156,15 +143,15 @@ type IGetCollectLog interface {
|
|||
GetUid() int64 //
|
||||
GetPlatform() string //
|
||||
GetAnchorId() string //
|
||||
GetIsShowing() int32 //
|
||||
GetIsLiveStreaming() int32 //
|
||||
GetIsError() int32 //
|
||||
GetFollowers() sql.NullInt64 //
|
||||
GetViews() sql.NullInt64 //
|
||||
GetGiver() interface{} //
|
||||
GetGratuity() sql.NullInt64 //
|
||||
GetShowTitle() sql.NullString //
|
||||
GetShowStartTime() sql.NullTime //
|
||||
GetShowEndTime() sql.NullTime //
|
||||
GetLiveTitle() sql.NullString //
|
||||
GetLiveStartTime() sql.NullTime //
|
||||
GetLiveEndTime() sql.NullTime //
|
||||
GetUpdateTime() sql.NullTime //
|
||||
GetTags() interface{} //
|
||||
GetExt() interface{} //
|
||||
|
@ -175,72 +162,43 @@ type ISetCollectLog interface {
|
|||
SetUid(int64) //
|
||||
SetPlatform(string) //
|
||||
SetAnchorId(string) //
|
||||
SetIsShowing(int32) //
|
||||
SetIsLiveStreaming(int32) //
|
||||
SetIsError(int32) //
|
||||
SetFollowers(sql.NullInt64) //
|
||||
SetViews(sql.NullInt64) //
|
||||
SetGiver(interface{}) //
|
||||
SetGratuity(sql.NullInt64) //
|
||||
SetShowTitle(sql.NullString) //
|
||||
SetShowStartTime(sql.NullTime) //
|
||||
SetShowEndTime(sql.NullTime) //
|
||||
SetLiveTitle(sql.NullString) //
|
||||
SetLiveStartTime(sql.NullTime) //
|
||||
SetLiveEndTime(sql.NullTime) //
|
||||
SetUpdateTime(sql.NullTime) //
|
||||
SetTags(interface{}) //
|
||||
SetExt(interface{}) //
|
||||
SetErrorMsg(sql.NullString) //
|
||||
}
|
||||
|
||||
/*
|
||||
CREATE TABLE `collect_log` (
|
||||
`uid` bigint,
|
||||
`platform` varchar(255) NOT NULL,
|
||||
`anchor_id` varchar(255) NOT NULL,
|
||||
|
||||
`is_showing` tinyint(1) DEFAULT NULL,
|
||||
`is_error` tinyint(1) DEFAULT NULL,
|
||||
|
||||
`followers` int(11) DEFAULT NULL,
|
||||
`views` int(11) DEFAULT NULL,
|
||||
`giver` json DEFAULT NULL,
|
||||
`gratuity` int(11) DEFAULT NULL,
|
||||
|
||||
`show_title` text DEFAULT NULL,
|
||||
`show_start_time` timestamp NULL DEFAULT NULL,
|
||||
`show_end_time` timestamp NULL DEFAULT NULL,
|
||||
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, --时间戳从源数据里获取
|
||||
`ext` json DEFAULT NULL,
|
||||
|
||||
`error` text DEFAULT NULL,
|
||||
|
||||
KEY `uid_idx` (`uid`),
|
||||
KEY `platform_idx` (`platform`),
|
||||
KEY `anchor_id_idx` (`anchor_id`),
|
||||
KEY `is_showing_idx` (`is_showing`),
|
||||
KEY `is_error_idx` (`is_error`),
|
||||
KEY `followers_idx` (`followers`),
|
||||
KEY `views_idx` (`views`),
|
||||
KEY `gratuity_idx` (`gratuity`),
|
||||
KEY `update_time_idx` (`update_time`)
|
||||
)
|
||||
*/
|
||||
|
||||
type CollectLog struct {
|
||||
Uid int64 //
|
||||
Platform string //
|
||||
AnchorId string //
|
||||
IsShowing int32 //
|
||||
IsError int32 //
|
||||
Followers sql.NullInt64 //
|
||||
Views sql.NullInt64 //
|
||||
Giver interface{} //
|
||||
Gratuity sql.NullInt64 //
|
||||
ShowTitle sql.NullString //
|
||||
ShowStartTime sql.NullTime //
|
||||
ShowEndTime sql.NullTime //
|
||||
UpdateTime sql.NullTime //
|
||||
Tags interface{}
|
||||
Ext interface{} //
|
||||
ErrorMsg sql.NullString //
|
||||
Uid int64 //
|
||||
Platform string //
|
||||
AnchorId string //
|
||||
IsLiveStreaming int32 //
|
||||
IsError int32 //
|
||||
Followers sql.NullInt64 //
|
||||
Views sql.NullInt64 //
|
||||
Giver interface{} //
|
||||
Gratuity sql.NullInt64 //
|
||||
LiveTitle sql.NullString //
|
||||
LiveStartTime sql.NullTime //
|
||||
LiveEndTime sql.NullTime //
|
||||
UpdateTime sql.NullTime //
|
||||
Tags interface{}
|
||||
Ext interface{} //
|
||||
ErrorMsg sql.NullString //
|
||||
}
|
||||
|
||||
// Set Simple Value
|
||||
func (cl *CollectLog) Set(field string, value interface{}) {
|
||||
reflect.ValueOf(cl).Elem().FieldByName(field).Set(reflect.ValueOf(value))
|
||||
}
|
||||
|
||||
// GetTags Get return Tags interface{}
|
||||
|
@ -283,34 +241,34 @@ func (cl *CollectLog) SetUpdateTime(UpdateTime sql.NullTime) {
|
|||
cl.UpdateTime = UpdateTime
|
||||
}
|
||||
|
||||
// GetShowEndTime Get return ShowEndTime sql.NullTime
|
||||
func (cl *CollectLog) GetShowEndTime() sql.NullTime {
|
||||
return cl.ShowEndTime
|
||||
// GetLiveEndTime Get return ShowEndTime sql.NullTime
|
||||
func (cl *CollectLog) GetLiveEndTime() sql.NullTime {
|
||||
return cl.LiveEndTime
|
||||
}
|
||||
|
||||
// SetShowEndTime Set ShowEndTime sql.NullTime
|
||||
func (cl *CollectLog) SetShowEndTime(ShowEndTime sql.NullTime) {
|
||||
cl.ShowEndTime = ShowEndTime
|
||||
// SetLiveEndTime Set ShowEndTime sql.NullTime
|
||||
func (cl *CollectLog) SetLiveEndTime(ShowEndTime sql.NullTime) {
|
||||
cl.LiveEndTime = ShowEndTime
|
||||
}
|
||||
|
||||
// GetShowStartTime Get return ShowStartTime sql.NullTime
|
||||
func (cl *CollectLog) GetShowStartTime() sql.NullTime {
|
||||
return cl.ShowStartTime
|
||||
// GetLiveStartTime Get return ShowStartTime sql.NullTime
|
||||
func (cl *CollectLog) GetLiveStartTime() sql.NullTime {
|
||||
return cl.LiveStartTime
|
||||
}
|
||||
|
||||
// SetShowStartTime Set ShowStartTime sql.NullTime
|
||||
func (cl *CollectLog) SetShowStartTime(ShowStartTime sql.NullTime) {
|
||||
cl.ShowStartTime = ShowStartTime
|
||||
// SetLiveStartTime Set ShowStartTime sql.NullTime
|
||||
func (cl *CollectLog) SetLiveStartTime(ShowStartTime sql.NullTime) {
|
||||
cl.LiveStartTime = ShowStartTime
|
||||
}
|
||||
|
||||
// GetShowTitle Get return ShowTitle sql.NullString
|
||||
func (cl *CollectLog) GetShowTitle() sql.NullString {
|
||||
return cl.ShowTitle
|
||||
// GetLiveTitle Get return ShowTitle sql.NullString
|
||||
func (cl *CollectLog) GetLiveTitle() sql.NullString {
|
||||
return cl.LiveTitle
|
||||
}
|
||||
|
||||
// SetShowTitle Set ShowTitle sql.NullString
|
||||
func (cl *CollectLog) SetShowTitle(ShowTitle sql.NullString) {
|
||||
cl.ShowTitle = ShowTitle
|
||||
// SetLiveTitle Set ShowTitle sql.NullString
|
||||
func (cl *CollectLog) SetLiveTitle(ShowTitle sql.NullString) {
|
||||
cl.LiveTitle = ShowTitle
|
||||
}
|
||||
|
||||
// GetGratuity Get return Gratuity sql.NullInt32
|
||||
|
@ -363,14 +321,14 @@ func (cl *CollectLog) SetIsError(IsError int32) {
|
|||
cl.IsError = IsError
|
||||
}
|
||||
|
||||
// GetIsShowing Get return IsShowing int32
|
||||
func (cl *CollectLog) GetIsShowing() int32 {
|
||||
return cl.IsShowing
|
||||
// GetIsLiveStreaming Get return IsShowing int32
|
||||
func (cl *CollectLog) GetIsLiveStreaming() int32 {
|
||||
return cl.IsLiveStreaming
|
||||
}
|
||||
|
||||
// SetIsShowing Set IsShowing int32
|
||||
func (cl *CollectLog) SetIsShowing(IsShowing int32) {
|
||||
cl.IsShowing = IsShowing
|
||||
// SetIsLiveStreaming Set IsShowing int32
|
||||
func (cl *CollectLog) SetIsLiveStreaming(IsLive int32) {
|
||||
cl.IsLiveStreaming = IsLive
|
||||
}
|
||||
|
||||
// GetAnchorId Get return AnchorId string
|
||||
|
@ -402,3 +360,26 @@ func (cl *CollectLog) GetUid() int64 {
|
|||
func (cl *CollectLog) SetUid(Uid int64) {
|
||||
cl.Uid = Uid
|
||||
}
|
||||
|
||||
type ExtractorSource struct {
|
||||
source gjson.Result
|
||||
extractor *hunter.Extractor
|
||||
}
|
||||
|
||||
func NewExtractorSource(gr gjson.Result) *ExtractorSource {
|
||||
es := &ExtractorSource{}
|
||||
es.source = gr
|
||||
return es
|
||||
}
|
||||
|
||||
func (es *ExtractorSource) CreateExtractor() {
|
||||
es.extractor = hunter.NewExtractor([]byte(es.source.Str))
|
||||
}
|
||||
|
||||
func (es *ExtractorSource) GetSource() gjson.Result {
|
||||
return es.source
|
||||
}
|
||||
|
||||
func (es *ExtractorSource) GetExtractor() *hunter.Extractor {
|
||||
return es.extractor
|
||||
}
|
||||
|
|
1
go.mod
1
go.mod
|
@ -5,7 +5,6 @@ go 1.14
|
|||
require (
|
||||
github.com/474420502/gcurl v0.1.2
|
||||
github.com/474420502/hunter v0.3.0
|
||||
github.com/474420502/requests v1.6.0
|
||||
github.com/go-sql-driver/mysql v1.5.0
|
||||
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
|
||||
github.com/tidwall/gjson v1.6.0
|
||||
|
|
4
go.sum
4
go.sum
|
@ -4,12 +4,8 @@ cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSR
|
|||
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
|
||||
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
|
||||
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
|
||||
github.com/474420502/gcurl v0.1.0 h1:aKPP27qAnofntTqqVF8rjejHBWVlYWzFEZGdqjBiMgw=
|
||||
github.com/474420502/gcurl v0.1.0/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
|
||||
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
|
||||
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
|
||||
github.com/474420502/hunter v0.2.0 h1:pH7xIWzas2IuLdCJL/HtcouHcZQq0XIP/FswY7yF+pA=
|
||||
github.com/474420502/hunter v0.2.0/go.mod h1:c1+92qUtjNzYw6Mzl6Qkb2kMALAXMlYQk3kJdoOqnmY=
|
||||
github.com/474420502/hunter v0.3.0 h1:0VPi1MInxjHOta3da4v0ALWK0y3/X4/6nUSLFvdbiFU=
|
||||
github.com/474420502/hunter v0.3.0/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
|
||||
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
|
||||
|
|
|
@ -25,7 +25,7 @@ CREATE TABLE IF NOT EXISTS `collect_log` (
|
|||
`platform` varchar(255) NOT NULL,
|
||||
`anchor_id` varchar(255) NOT NULL,
|
||||
|
||||
`is_showing` tinyint(1) DEFAULT 0,
|
||||
`is_live_streaming` tinyint(1) DEFAULT 0,
|
||||
`is_error` tinyint(1) DEFAULT 0,
|
||||
|
||||
`followers` bigint(11) DEFAULT NULL,
|
||||
|
@ -33,9 +33,9 @@ CREATE TABLE IF NOT EXISTS `collect_log` (
|
|||
`giver` json DEFAULT NULL,
|
||||
`gratuity` bigint(11) DEFAULT NULL,
|
||||
|
||||
`show_title` text DEFAULT NULL,
|
||||
`show_start_time` timestamp NULL DEFAULT NULL,
|
||||
`show_end_time` timestamp NULL DEFAULT NULL,
|
||||
`live_title` text DEFAULT NULL,
|
||||
`live_start_time` timestamp NULL DEFAULT NULL,
|
||||
`live_end_time` timestamp NULL DEFAULT NULL,
|
||||
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
`tags` json DEFAULT NULL,
|
||||
`ext` json DEFAULT NULL,
|
||||
|
@ -45,7 +45,7 @@ CREATE TABLE IF NOT EXISTS `collect_log` (
|
|||
KEY `uid_idx` (`uid`),
|
||||
KEY `platform_idx` (`platform`),
|
||||
KEY `anchor_id_idx` (`anchor_id`),
|
||||
KEY `is_showing_idx` (`is_showing`),
|
||||
KEY `is_live_streaming_idx` (`is_live_streaming`),
|
||||
KEY `is_error_idx` (`is_error`),
|
||||
KEY `followers_idx` (`followers`),
|
||||
KEY `views_idx` (`views`),
|
||||
|
|
38
store.go
38
store.go
|
@ -19,6 +19,10 @@ const (
|
|||
OperatorError OperatorFlag = 10000
|
||||
)
|
||||
|
||||
type ISet interface {
|
||||
Set(string, interface{})
|
||||
}
|
||||
|
||||
// SourceStore 储存
|
||||
type SourceStore struct {
|
||||
table string
|
||||
|
@ -187,7 +191,6 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
|
|||
}
|
||||
|
||||
result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, tags, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetTags(), isource.GetExt())
|
||||
log.Println(result.LastInsertId())
|
||||
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
|
@ -207,39 +210,10 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
|
|||
return result.LastInsertId()
|
||||
}
|
||||
|
||||
// InsertAnchorInfo AnchorInfo表, 插入数据
|
||||
// func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) error {
|
||||
// _, err := store.db.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, show_type, ext) values(?,?,?,?,?,?,?)", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetShowType(), isource.GetExt())
|
||||
// store.errorAlarm(err)
|
||||
// return err
|
||||
// }
|
||||
|
||||
/*
|
||||
`uid` bigint,
|
||||
`platform` varchar(255) NOT NULL,
|
||||
`anchor_id` varchar(255) NOT NULL,
|
||||
|
||||
`is_showing` tinyint(1) DEFAULT NULL,
|
||||
`is_error` tinyint(1) DEFAULT NULL,
|
||||
|
||||
`followers` int(11) DEFAULT NULL,
|
||||
`views` int(11) DEFAULT NULL,
|
||||
`giver` json DEFAULT NULL,
|
||||
`gratuity` int(11) DEFAULT NULL,
|
||||
|
||||
`show_title` text DEFAULT NULL,
|
||||
`show_start_time` timestamp NULL DEFAULT NULL,
|
||||
`show_end_time` timestamp NULL DEFAULT NULL,
|
||||
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, --时间戳从源数据里获取
|
||||
`ext` json DEFAULT NULL,
|
||||
|
||||
`error_msg` text DEFAULT NULL,
|
||||
*/
|
||||
|
||||
// InsertCollectLog CollectLog表插入数据
|
||||
func (store *ExtractorStore) InsertCollectLog(isource IGetCollectLog) error {
|
||||
_, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||
isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetTags(), isource.GetExt(), isource.GetErrorMsg(),
|
||||
_, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_live_streaming, is_error, followers, views, giver, gratuity, live_title, live_start_time, live_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
||||
isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsLiveStreaming(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetLiveTitle(), isource.GetLiveStartTime(), isource.GetLiveEndTime(), isource.GetUpdateTime(), isource.GetTags(), isource.GetExt(), isource.GetErrorMsg(),
|
||||
)
|
||||
store.errorAlarm(err)
|
||||
return err
|
||||
|
|
Loading…
Reference in New Issue
Block a user