intimate/tasks/openrec/openrec_task2/task_openrec.go

179 lines
4.3 KiB
Go
Raw Normal View History

package main
import (
2020-07-14 11:00:34 +00:00
"database/sql"
2020-07-09 03:38:51 +00:00
"encoding/json"
"intimate"
"log"
2020-07-16 10:31:13 +00:00
"os"
"os/signal"
2020-07-14 11:00:34 +00:00
"strconv"
2020-07-16 10:31:13 +00:00
"sync/atomic"
"syscall"
2020-07-09 03:38:51 +00:00
"time"
2020-07-13 10:10:48 +00:00
"github.com/474420502/gcurl"
2020-07-14 11:00:34 +00:00
"github.com/tidwall/gjson"
2020-07-13 10:10:48 +00:00
"github.com/474420502/hunter"
)
var oer *OpenrecExtratorRanking
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STOpenrec))
// estore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_extractor.sql
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
func init() {
oer = &OpenrecExtratorRanking{}
}
// OpenrecExtratorRanking 获取用户信息
type OpenrecExtratorRanking struct {
// Store *intimate.Store
}
// Execute 执行方法
func (oer *OpenrecExtratorRanking) Execute(cxt *hunter.TaskContext) {
2020-07-16 10:31:13 +00:00
var loop int32 = 1
go func() {
signalchan := make(chan os.Signal)
signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
log.Println("accept stop command:", <-signalchan)
atomic.StoreInt32(&loop, 0)
}()
var lasterr error = nil
2020-07-16 10:31:13 +00:00
for atomic.LoadInt32(&loop) > 0 {
2020-07-09 03:38:51 +00:00
streamer, err := estore.Pop(intimate.Popenrec) //队列里弹出一个streamer行. 进行解析
2020-07-09 03:38:51 +00:00
if streamer == nil || err != nil {
if err != lasterr {
log.Println(err, lasterr)
lasterr = err
}
2020-07-13 10:10:48 +00:00
time.Sleep(time.Second * 2)
continue
2020-07-09 03:38:51 +00:00
}
userId := streamer.UserId
var updateUrl map[string]string
2020-07-09 09:09:46 +00:00
err = json.Unmarshal(streamer.UpdateUrl.([]byte), &updateUrl) // 反序列化update_url, 里面存了需要采集的url
if err != nil {
log.Println(err)
continue
}
// Check Userid
userUrl := updateUrl["user"]
tp := cxt.Session().Get(userUrl) // 获取user url页面数据
resp, err := tp.Execute()
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
2020-07-09 09:09:46 +00:00
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
2020-07-09 09:09:46 +00:00
continue
}
cookies := cxt.Session().GetCookies(tp.GetParsedURL())
2020-07-13 10:10:48 +00:00
scurl := updateUrl["supporters"] //获取打赏者的数据
2020-07-13 10:10:48 +00:00
curl := gcurl.ParseRawCURL(scurl)
supportersSession := curl.CreateSession()
2020-07-13 11:11:13 +00:00
2020-07-14 11:00:34 +00:00
temporary := curl.CreateTemporary(supportersSession)
supportersSession.SetCookies(temporary.GetParsedURL(), cookies)
var supporters []string
for { // supporters 数据需要登录信息. 下面为赋值 supporters链接获取的uid token random码
2020-07-13 10:10:48 +00:00
2020-07-14 11:00:34 +00:00
supportersQuery := temporary.GetQuery()
2020-07-13 10:10:48 +00:00
for _, cookie := range cookies {
if cookie.Name == "uuid" {
supportersQuery.Set("Uuid", cookie.Value)
continue
}
if cookie.Name == "token" {
supportersQuery.Set("Token", cookie.Value)
continue
}
if cookie.Name == "random" {
supportersQuery.Set("Random", cookie.Value)
continue
}
}
supportersQuery.Set("identify_id", userId)
2020-07-14 11:00:34 +00:00
temporary.SetQuery(supportersQuery)
2020-07-13 10:10:48 +00:00
2020-07-14 11:00:34 +00:00
resp, err := temporary.Execute()
2020-07-13 10:10:48 +00:00
if err != nil {
log.Println(err)
}
2020-07-14 11:00:34 +00:00
supporterjson := gjson.ParseBytes(resp.Content())
supporterdata := supporterjson.Get("data") //解析supporters获取的json数据
2020-07-14 11:00:34 +00:00
if supporterdata.Type == gjson.Null {
break
}
supporters = append(supporters, string(resp.Content()))
page := supportersQuery.Get("page_number") // page_number 加1
2020-07-14 11:00:34 +00:00
pageint, err := strconv.Atoi(page)
if err != nil {
log.Println(err)
break
}
pageint++
page = strconv.Itoa(pageint)
supportersQuery.Set("page_number", page)
temporary.SetQuery(supportersQuery)
2020-07-13 10:10:48 +00:00
}
2020-07-14 11:00:34 +00:00
// cookies := cxt.Session().GetCookies(wf.GetParsedURL())
2020-07-09 09:09:46 +00:00
ext := make(map[string]interface{})
ext["json_supporters"] = supporters
2020-07-20 10:54:34 +00:00
ext["html_user"] = string(resp.Content())
2020-07-09 09:09:46 +00:00
liveUrl := updateUrl["live"]
tp = cxt.Session().Get(liveUrl)
resp, err = tp.Execute()
2020-07-09 09:09:46 +00:00
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
2020-07-09 09:09:46 +00:00
continue
}
2020-07-20 10:54:34 +00:00
ext["html_live"] = string(resp.Content())
ext["var_user_id"] = userId
2020-07-09 09:09:46 +00:00
extJsonBytes, err := json.Marshal(ext)
if err != nil {
log.Println(err)
estore.UpdateError(streamer, err)
2020-07-09 09:09:46 +00:00
continue
}
streamer.Operator = int32(intimate.OperatorOK)
source := &intimate.Source{}
source.Target = intimate.TOpenrecUser
source.Ext = string(extJsonBytes)
2020-07-20 10:54:34 +00:00
source.StreamerId = sql.NullInt64{Int64: streamer.Uid, Valid: true}
sstore.Insert(source)
estore.UpdateOperator(streamer)
2020-07-09 03:38:51 +00:00
}
2020-07-09 09:09:46 +00:00
}