TODO: 正则Regexp 获取Tags错误

This commit is contained in:
eson 2020-07-15 19:23:45 +08:00
parent b63e180499
commit 13ae890171
5 changed files with 62 additions and 40 deletions

2
extractor/openrec/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
*.html
screenlog.*

View File

@ -96,6 +96,10 @@ func TestExtractor(t *testing.T) {
collect := intimate.NewExtractorStore() collect := intimate.NewExtractorStore()
store := intimate.NewSourceStore("source_openrec") store := intimate.NewSourceStore("source_openrec")
source, err := store.Pop(string(intimate.TTOpenrecRanking), 100) source, err := store.Pop(string(intimate.TTOpenrecRanking), 100)
if err != nil {
log.Println(err)
return
}
anchorId := source.GetSource().String anchorId := source.GetSource().String
@ -180,11 +184,11 @@ func TestExtractor(t *testing.T) {
// MovieToolbar__Views-g5e6ic-13 iDRGyA // MovieToolbar__Views-g5e6ic-13 iDRGyA
livejson := m["user_live"] livejson := m["user_live"]
// f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm) f, err := os.OpenFile("./test.html", os.O_CREATE|os.O_TRUNC|os.O_RDWR, os.ModePerm)
// if err != nil { if err != nil {
// panic(err) panic(err)
// } }
// f.WriteString(livejson.String()) f.WriteString(livejson.String())
extractor = hunter.NewExtractor([]byte(livejson.Str)) extractor = hunter.NewExtractor([]byte(livejson.Str))
// xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]") // xr, err := extractor.XPathResult("//h1[ contains(@class, 'MovieTitle__Title')]")
@ -228,6 +232,9 @@ func TestExtractor(t *testing.T) {
} }
} }
matheslist := regexp.MustCompile(`TagButton__Button.+>([^<]+)<`).FindAllStringSubmatch(livejson.Str, 0)
t.Error(matheslist)
LiveUrl := "https://www.openrec.tv/live/" + anchorId LiveUrl := "https://www.openrec.tv/live/" + anchorId
ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true}) ai.SetLiveUrl(sql.NullString{String: LiveUrl, Valid: true})

View File

@ -12,7 +12,7 @@ type ISetAnchorInfo interface {
SetAnchorName(string) // SetAnchorName(string) //
SetLiveUrl(sql.NullString) // SetLiveUrl(sql.NullString) //
SetChannel(sql.NullString) // SetChannel(sql.NullString) //
SetShowType(sql.NullString) // SetTags(interface{}) //
SetExt(interface{}) // SetExt(interface{}) //
SetUpdateTime(time.Time) // SetUpdateTime(time.Time) //
} }
@ -24,7 +24,7 @@ type IGetAnchorInfo interface {
GetAnchorName() string // GetAnchorName() string //
GetLiveUrl() sql.NullString // GetLiveUrl() sql.NullString //
GetChannel() sql.NullString // GetChannel() sql.NullString //
GetShowType() sql.NullString // GetTags() interface{}
GetExt() interface{} // GetExt() interface{} //
GetUpdateTime() time.Time // GetUpdateTime() time.Time //
} }
@ -58,11 +58,21 @@ type AnchorInfo struct {
AnchorName string // AnchorName string //
LiveUrl sql.NullString // LiveUrl sql.NullString //
Channel sql.NullString // Channel sql.NullString //
ShowType sql.NullString // Tags interface{}
Ext interface{} // Ext interface{} //
UpdateTime time.Time // UpdateTime time.Time //
} }
// GetTags Get return Tags interface{}
func (ai *AnchorInfo) GetTags() interface{} {
return ai.Tags
}
// SetTags Set Tags interface{}
func (ai *AnchorInfo) SetTags(Tags interface{}) {
ai.Tags = Tags
}
// GetUpdateTime Get return UpdateTime time.Time // GetUpdateTime Get return UpdateTime time.Time
func (ai *AnchorInfo) GetUpdateTime() time.Time { func (ai *AnchorInfo) GetUpdateTime() time.Time {
return ai.UpdateTime return ai.UpdateTime
@ -83,16 +93,6 @@ func (ai *AnchorInfo) SetExt(Ext interface{}) {
ai.Ext = Ext ai.Ext = Ext
} }
// GetShowType Get return ShowType sql.NullString
func (ai *AnchorInfo) GetShowType() sql.NullString {
return ai.ShowType
}
// SetShowType Set ShowType sql.NullString
func (ai *AnchorInfo) SetShowType(ShowType sql.NullString) {
ai.ShowType = ShowType
}
// GetChannel Get return Channel sql.NullString // GetChannel Get return Channel sql.NullString
func (ai *AnchorInfo) GetChannel() sql.NullString { func (ai *AnchorInfo) GetChannel() sql.NullString {
return ai.Channel return ai.Channel
@ -167,6 +167,7 @@ type IGetCollectLog interface {
GetShowStartTime() sql.NullTime // GetShowStartTime() sql.NullTime //
GetShowEndTime() sql.NullTime // GetShowEndTime() sql.NullTime //
GetUpdateTime() sql.NullTime // GetUpdateTime() sql.NullTime //
GetTags() interface{} //
GetExt() interface{} // GetExt() interface{} //
GetErrorMsg() sql.NullString // GetErrorMsg() sql.NullString //
} }
@ -185,6 +186,7 @@ type ISetCollectLog interface {
SetShowStartTime(sql.NullTime) // SetShowStartTime(sql.NullTime) //
SetShowEndTime(sql.NullTime) // SetShowEndTime(sql.NullTime) //
SetUpdateTime(sql.NullTime) // SetUpdateTime(sql.NullTime) //
SetTags(interface{}) //
SetExt(interface{}) // SetExt(interface{}) //
SetErrorMsg(sql.NullString) // SetErrorMsg(sql.NullString) //
} }
@ -237,10 +239,21 @@ type CollectLog struct {
ShowStartTime sql.NullTime // ShowStartTime sql.NullTime //
ShowEndTime sql.NullTime // ShowEndTime sql.NullTime //
UpdateTime sql.NullTime // UpdateTime sql.NullTime //
Tags interface{}
Ext interface{} // Ext interface{} //
ErrorMsg sql.NullString // ErrorMsg sql.NullString //
} }
// GetTags Get return Tags interface{}
func (cl *CollectLog) GetTags() interface{} {
return cl.Tags
}
// SetTags Set Tags interface{}
func (cl *CollectLog) SetTags(Tags interface{}) {
cl.Tags = Tags
}
// GetErrorMsg Get return Error sql.NullString // GetErrorMsg Get return Error sql.NullString
func (cl *CollectLog) GetErrorMsg() sql.NullString { func (cl *CollectLog) GetErrorMsg() sql.NullString {
return cl.ErrorMsg return cl.ErrorMsg

View File

@ -17,7 +17,6 @@ CREATE TABLE IF NOT EXISTS `anchor_info` (
KEY `anchor_id_idx` (`anchor_id`), KEY `anchor_id_idx` (`anchor_id`),
KEY `anchor_name_idx` (`anchor_name`), KEY `anchor_name_idx` (`anchor_name`),
KEY `channel_idx` (`channel`), KEY `channel_idx` (`channel`),
KEY `show_type_idx` (`show_type`),
KEY `update_time_idx` (`update_time`) KEY `update_time_idx` (`update_time`)
); );
@ -38,6 +37,7 @@ CREATE TABLE IF NOT EXISTS `collect_log` (
`show_start_time` timestamp NULL DEFAULT NULL, `show_start_time` timestamp NULL DEFAULT NULL,
`show_end_time` timestamp NULL DEFAULT NULL, `show_end_time` timestamp NULL DEFAULT NULL,
`update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP, `update_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
`tags` json DEFAULT NULL,
`ext` json DEFAULT NULL, `ext` json DEFAULT NULL,
`error_msg` text DEFAULT NULL, `error_msg` text DEFAULT NULL,

View File

@ -198,7 +198,7 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
return uid, nil return uid, nil
} }
result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, show_type, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetShowType(), isource.GetExt()) result, err := tx.Exec("insert into "+AnchorTable+"(platform, anchor_id, anchor_name, live_url, channel, tags, ext) values(?,?,?,?,?,?,?);", isource.GetPlatform(), isource.GetAnchorId(), isource.GetAnchorName(), isource.GetLiveUrl(), isource.GetChannel(), isource.GetTags(), isource.GetExt())
log.Println(result.LastInsertId()) log.Println(result.LastInsertId())
if err != nil { if err != nil {
@ -249,8 +249,8 @@ func (store *ExtractorStore) InsertAnchorInfo(isource IGetAnchorInfo) (Uid int64
// InsertCollectLog CollectLog表插入数据 // InsertCollectLog CollectLog表插入数据
func (store *ExtractorStore) InsertCollectLog(isource IGetCollectLog) error { func (store *ExtractorStore) InsertCollectLog(isource IGetCollectLog) error {
_, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)", _, err := store.db.Exec("insert into "+CollectLogTable+"(uid, platform, anchor_id, is_showing, is_error, followers, views, giver, gratuity, show_title, show_start_time, show_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetExt(), isource.GetErrorMsg(), isource.GetUid(), isource.GetPlatform(), isource.GetAnchorId(), isource.GetIsShowing(), isource.GetIsError(), isource.GetFollowers(), isource.GetViews(), isource.GetGiver(), isource.GetGratuity(), isource.GetShowTitle(), isource.GetShowStartTime(), isource.GetShowEndTime(), isource.GetUpdateTime(), isource.GetTags(), isource.GetExt(), isource.GetErrorMsg(),
) )
store.errorAlarm(err) store.errorAlarm(err)
return err return err