完成 openrec 的排名获取主播主页的功能.

TODO: 分解成多任务服务机制. 让每个子任务独立互不干扰.
This commit is contained in:
eson 2020-07-07 18:14:14 +08:00
parent fdc351da83
commit eba36b0c95
7 changed files with 265 additions and 196 deletions

View File

@ -1,134 +0,0 @@
package main
import (
"database/sql"
"time"
"github.com/474420502/hunter"
)
var targetTypeRanking = "openrec_ranking"
var openrecRanking *OpenrecRanking
// storeOpenrec 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var storeOpenrec *Store
func init() {
openrecRanking = &OpenrecRanking{}
openrecRanking.PreCurlUrl = `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
}
/*
CREATE TABLE `platform_openrec` (
`url` text NOT NULL,
`target_type` varchar(64) NOT NULL,
`source` longtext DEFAULT NULL,
`ext` json DEFAULT NULL,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`operator` tinyint NOT NULL,
KEY `operator_idx` (`operator`),
KEY `update_time_idx` (`update_time`),
KEY `target_type_idx` (`target_type`)
);
*/
type PlatformOpenrec struct {
Url string //
TargetType string //
Source sql.NullString //
Ext interface{} //
UpdateTime time.Time //
Operator int32 //
}
// GetOperator Get return Operator int32
func (po *PlatformOpenrec) GetOperator() int32 {
return po.Operator
}
// SetOperator Set Operator int32
func (po *PlatformOpenrec) SetOperator(Operator int32) {
po.Operator = Operator
}
// GetUpdateTime Get return UpdateTime time.Time
func (po *PlatformOpenrec) GetUpdateTime() time.Time {
return po.UpdateTime
}
// SetUpdateTime Set UpdateTime time.Time
func (po *PlatformOpenrec) SetUpdateTime(UpdateTime time.Time) {
po.UpdateTime = UpdateTime
}
// GetExt Get return Ext interface{}
func (po *PlatformOpenrec) GetExt() interface{} {
return po.Ext
}
// SetExt Set Ext interface{}
func (po *PlatformOpenrec) SetExt(Ext interface{}) {
po.Ext = Ext
}
// GetSource Get return Source sql.NullString
func (po *PlatformOpenrec) GetSource() sql.NullString {
return po.Source
}
// SetSource Set Source sql.NullString
func (po *PlatformOpenrec) SetSource(Source sql.NullString) {
po.Source = Source
}
// GetTargetType Get return TargetType string
func (po *PlatformOpenrec) GetTargetType() string {
return po.TargetType
}
// SetTargetType Set TargetType string
func (po *PlatformOpenrec) SetTargetType(TargetType string) {
po.TargetType = TargetType
}
// GetUrl Get return Url string
func (po *PlatformOpenrec) GetUrl() string {
return po.Url
}
// SetUrl Set Url string
func (po *PlatformOpenrec) SetUrl(Url string) {
po.Url = Url
}
// OpenrecRanking 获取排名任务
type OpenrecRanking struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
data := &PlatformOpenrec{}
content := resp.Content()
data.SetSource(sql.NullString{String: string(content), Valid: len(content) > 0})
data.SetUrl(cxt.Workflow().GetRawURL())
data.SetTargetType(targetTypeRanking)
storeOpenrec.Save(data)
}

View File

@ -1,51 +0,0 @@
package main
import (
"testing"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
// OpenrecRanking 获取排名任务
type OpenrecRankingTest struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRankingTest) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
t := cxt.GetShare("test").(*testing.T)
if !gjson.ValidBytes(resp.Content()) {
t.Error("source is not json format.")
}
result := gjson.GetBytes(resp.Content(), "0.rank")
if result.Int() != 1 {
t.Error("rank is error. result raw is ", result.Raw)
}
// t.Error(string(resp.Content()))
}
func TestRanking(t *testing.T) {
// curlBash := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
// -H 'authority: public.openrec.tv' \
// -H 'accept: application/json, text/javascript, */*; q=0.01' \
// -H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
// -H 'origin: https://www.openrec.tv' \
// -H 'sec-fetch-site: same-site' \
// -H 'sec-fetch-mode: cors' \
// -H 'sec-fetch-dest: empty' \
// -H 'referer: https://www.openrec.tv/ranking' \
// -H 'accept-language: zh-CN,zh;q=0.9' \
// -H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
// --compressed`
ht := hunter.NewHunter(openrecRanking)
ht.SetShare("test", t)
ht.Execute()
}

196
source_openrec.go Normal file
View File

@ -0,0 +1,196 @@
package main
import (
"database/sql"
"log"
"strconv"
"time"
"github.com/474420502/hunter"
)
var targetTypeRanking = "openrec_ranking"
var openrecRanking *OpenrecRanking
// storeOpenrec 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
var storeOpenrec *Store
func init() {
openrecRanking = &OpenrecRanking{}
openrecRanking.PreCurlUrl = `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
}
/*
CREATE TABLE `source_openrec` (
uid bigint AUTO_INCREMENT,
`url` text NOT NULL,
`target_type` varchar(64) NOT NULL,
`source` longtext DEFAULT NULL,
`ext` json DEFAULT NULL,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`operator` int DEFAULT 0,
`error_msg` text DEFAULT NULL,
PRIMARY KEY(`uid`),
KEY `operator_idx` (`operator`),
KEY `update_time_idx` (`update_time`),
KEY `target_type_idx` (`target_type`)
);
*/
type SourceOpenrec struct {
Uid sql.NullInt64 //
Url string //
TargetType string //
Source sql.NullString //
Ext interface{} //
UpdateTime time.Time //
Operator int32 //
ErrorMsg sql.NullString //
}
// GetErrorMsg Get return ErrorMsg sql.NullString
func (so *SourceOpenrec) GetErrorMsg() sql.NullString {
return so.ErrorMsg
}
// SetErrorMsg Set ErrorMsg sql.NullString
func (so *SourceOpenrec) SetErrorMsg(ErrorMsg sql.NullString) {
so.ErrorMsg = ErrorMsg
}
// GetOperator Get return Operator sql.NullInt32
func (so *SourceOpenrec) GetOperator() int32 {
return so.Operator
}
// SetOperator Set Operator sql.NullInt32
func (so *SourceOpenrec) SetOperator(Operator int32) {
so.Operator = Operator
}
// GetUpdateTime Get return UpdateTime time.Time
func (so *SourceOpenrec) GetUpdateTime() time.Time {
return so.UpdateTime
}
// SetUpdateTime Set UpdateTime time.Time
func (so *SourceOpenrec) SetUpdateTime(UpdateTime time.Time) {
so.UpdateTime = UpdateTime
}
// GetExt Get return Ext interface{}
func (so *SourceOpenrec) GetExt() interface{} {
return so.Ext
}
// SetExt Set Ext interface{}
func (so *SourceOpenrec) SetExt(Ext interface{}) {
so.Ext = Ext
}
// GetSource Get return Source sql.NullString
func (so *SourceOpenrec) GetSource() sql.NullString {
return so.Source
}
// SetSource Set Source sql.NullString
func (so *SourceOpenrec) SetSource(Source sql.NullString) {
so.Source = Source
}
// GetTargetType Get return TargetType string
func (so *SourceOpenrec) GetTargetType() string {
return so.TargetType
}
// SetTargetType Set TargetType string
func (so *SourceOpenrec) SetTargetType(TargetType string) {
so.TargetType = TargetType
}
// GetUrl Get return Url string
func (so *SourceOpenrec) GetUrl() string {
return so.Url
}
// SetUrl Set Url string
func (so *SourceOpenrec) SetUrl(Url string) {
so.Url = Url
}
// GetUid Get return Uid sql.NullInt64
func (so *SourceOpenrec) GetUid() sql.NullInt64 {
return so.Uid
}
// SetUid Set Uid sql.NullInt64
func (so *SourceOpenrec) SetUid(Uid sql.NullInt64) {
so.Uid = Uid
}
// OpenrecRanking 获取排名任务
type OpenrecRanking struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRanking) Execute(cxt *hunter.TaskContext) {
for {
errorMsg := sql.NullString{Valid: false}
resp, err := cxt.Hunt()
if err != nil {
log.Println(err)
break
}
wf := cxt.Workflow()
data := &SourceOpenrec{}
content := resp.Content()
log.Println(len(content))
if len(content) <= 300 {
return
}
data.SetSource(sql.NullString{String: string(content), Valid: len(content) > 0})
data.SetUrl(wf.GetRawURL())
data.SetTargetType(targetTypeRanking)
querys := wf.GetQuery()
page, err := strconv.Atoi(querys.Get("page"))
if err != nil {
log.Println(err)
errorMsg.String = err.Error()
errorMsg.Valid = true
data.SetErrorMsg(errorMsg)
data.SetOperator(10000)
storeOpenrec.Save(data)
return
}
page++
querys.Set("page", strconv.Itoa(page))
wf.SetQuery(querys)
data.SetErrorMsg(errorMsg)
storeOpenrec.Save(data)
time.Sleep(time.Second * 2)
}
}

57
source_openrec_test.go Normal file
View File

@ -0,0 +1,57 @@
package main
import (
"testing"
"github.com/tidwall/gjson"
"github.com/474420502/hunter"
)
// OpenrecRanking 获取排名任务
type OpenrecRankingTest struct {
hunter.PreCurlUrl
}
// Execute 执行方法
func (or *OpenrecRankingTest) Execute(cxt *hunter.TaskContext) {
resp, err := cxt.Hunt()
if err != nil {
panic(err)
}
t := cxt.GetShare("test").(*testing.T)
if !gjson.ValidBytes(resp.Content()) {
t.Error("source is not json format.")
}
result := gjson.GetBytes(resp.Content(), "0.rank")
if result.Int() != 1 {
t.Error("rank is error. result raw is ", result.Raw)
}
if cxt.Workflow().GetQuery().Get("page") != "1" {
t.Error("workflow page error")
}
// t.Error(string(resp.Content()))
}
func TestRanking(t *testing.T) {
curlBash := `curl 'https://public.openrec.tv/external/api/v5/channel-ranks?period=monthly&date=&tag=&page=1' \
-H 'authority: public.openrec.tv' \
-H 'accept: application/json, text/javascript, */*; q=0.01' \
-H 'user-agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36' \
-H 'origin: https://www.openrec.tv' \
-H 'sec-fetch-site: same-site' \
-H 'sec-fetch-mode: cors' \
-H 'sec-fetch-dest: empty' \
-H 'referer: https://www.openrec.tv/ranking' \
-H 'accept-language: zh-CN,zh;q=0.9' \
-H 'if-none-match: W/"25edb-aUYBdmLqZcr6DW4ZWKX9r2aqolg"' \
--compressed`
ort := &OpenrecRankingTest{}
ort.PreCurlUrl = hunter.PreCurlUrl(curlBash)
ht := hunter.NewHunter(ort)
ht.SetShare("test", t)
ht.Execute()
}

View File

@ -1,7 +1,7 @@
create database if not exists `intimate_source`;
use intimate_source;
CREATE TABLE IF NOT EXISTS `platform_openrec` (
CREATE TABLE IF NOT EXISTS `source_openrec` (
uid bigint AUTO_INCREMENT,
`url` text NOT NULL,
`target_type` varchar(64) NOT NULL,

View File

@ -15,6 +15,9 @@ type ISource interface {
GetSource() sql.NullString //
GetExt() interface{} //
GetUpdateTime() time.Time //
GetOperator() int32 //
GetErrorMsg() sql.NullString //
}
// Store 储存
@ -33,7 +36,7 @@ func NewStore() *Store {
// Save 储存数据
func (store *Store) Save(isource ISource) {
_, err := store.db.Exec("insert into `platform_openrec`(url, target_type, source, ext) values(?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt())
_, err := store.db.Exec("insert into `platform_openrec`(url, target_type, source, ext, operator, error_msg) values(?,?,?,?,?,?)", isource.GetUrl(), isource.GetTargetType(), isource.GetSource(), isource.GetExt(), isource.GetOperator(), isource.GetErrorMsg())
if err != nil {
log.Fatalln(err)
}

View File

@ -2,13 +2,11 @@ package main
import (
"testing"
"github.com/474420502/hunter"
)
func TestStoreInsert(t *testing.T) {
ht := hunter.NewHunter(openrecRanking)
ht.Execute()
// ht := hunter.NewHunter(openrecRanking)
// ht.Execute()
}
func TestStoreInsertCase1(t *testing.T) {