3b7e8e94ea
nimo 数据测试提取成功
488 lines
15 KiB
Go
488 lines
15 KiB
Go
package intimate
|
|
|
|
import (
|
|
"crypto/md5"
|
|
"database/sql"
|
|
"fmt"
|
|
"log"
|
|
"strings"
|
|
"time"
|
|
|
|
_ "github.com/go-sql-driver/mysql"
|
|
)
|
|
|
|
// OperatorFlag 标志
|
|
type OperatorFlag int32
|
|
|
|
const (
|
|
// OperatorOK 等待被处理
|
|
OperatorOK OperatorFlag = 100
|
|
// OperatorExtractorOK 提取数据完成
|
|
OperatorExtractorOK OperatorFlag = 200
|
|
// OperatorWait 等待被处理
|
|
OperatorWait OperatorFlag = 1000
|
|
// OperatorError 错误标志
|
|
OperatorError OperatorFlag = 10000
|
|
)
|
|
|
|
type ISet interface {
|
|
Set(string, interface{})
|
|
}
|
|
|
|
type IGet interface {
|
|
Get(string) interface{}
|
|
}
|
|
|
|
type IGetSet interface {
|
|
ISet
|
|
IGet
|
|
}
|
|
|
|
// SourceStore 储存
|
|
type StoreSource struct {
|
|
table string
|
|
db *sql.DB
|
|
|
|
popCount int
|
|
errorCount int
|
|
errorLimit int
|
|
}
|
|
|
|
func (store *StoreSource) PopCount() int {
|
|
return store.popCount
|
|
}
|
|
|
|
func (store *StoreSource) Close() error {
|
|
return store.db.Close()
|
|
}
|
|
|
|
// NewSourceStore 创建一个存储实例
|
|
func NewStoreSource(table string) *StoreSource {
|
|
db, err := sql.Open("mysql", InitConfig.Database.SourceURI)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return &StoreSource{table: table, db: db}
|
|
}
|
|
|
|
func (store *StoreSource) errorAlarm(err error) {
|
|
if err != nil {
|
|
log.Println("store error: ", err)
|
|
// 报警. 如果数据插入有问题
|
|
store.errorCount++
|
|
if store.errorCount >= store.errorLimit {
|
|
// 数据库频繁操作初问题 报警, 减少没意义的请求
|
|
}
|
|
} else {
|
|
if store.errorCount > 0 {
|
|
store.errorCount--
|
|
}
|
|
}
|
|
}
|
|
|
|
// Insert 插入数据
|
|
func (store *StoreSource) Insert(isource IGet) {
|
|
_, err := store.db.Exec("insert into "+store.table+"(url, target_type, source, ext, operator, error_msg, streamer_id) values(?,?,?,?,?,?,?)", isource.Get("Url"), isource.Get("Target"), isource.Get("Source"), isource.Get("Ext"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("StreamerId"))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// Deduplicate 去重
|
|
func (store *StoreSource) Deduplicate(target Target, field string) {
|
|
sql := `DELETE FROM ` + store.table + ` WHERE uid NOT IN (SELECT MAX(s.uid) FROM (SELECT uid, ` + field + ` FROM ` + store.table + ` force index(target_type_idx) WHERE target_type = "` + string(target) + `" ) s GROUP BY s.` + string(field) + `) ;`
|
|
_, err := store.db.Exec(sql)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// Update 更新数据
|
|
func (store *StoreSource) Update(isource IGet) {
|
|
_, err := store.db.Exec("update "+store.table+" set ext = ?, pass_gob = ?, operator = ?, error_msg = ? where uid = ?", isource.Get("Ext"), isource.Get("PassGob"), isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// UpdateOperator 更新数据操作标志位
|
|
func (store *StoreSource) UpdateOperator(isource IGet) {
|
|
_, err := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// UpdateError 更新错误数据
|
|
func (store *StoreSource) UpdateError(isource IGetSet, err error) {
|
|
isource.Set("Operator", int32(OperatorError)+isource.Get("Operator").(int32))
|
|
isource.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
|
|
_, dberr := store.db.Exec("update "+store.table+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
|
if dberr != nil {
|
|
// email tell owner to deal with
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// Restore 恢复Operator数据状态
|
|
func (store *StoreSource) Restore(isource IGet) {
|
|
_, dberr := store.db.Exec("update "+store.table+" set operator = ? where uid = ?", isource.Get("LastOperator"), isource.Get("Uid"))
|
|
if dberr != nil {
|
|
// email tell owner to deal with
|
|
panic(dberr)
|
|
}
|
|
}
|
|
|
|
// Pop 弹出一条未处理的数据
|
|
func (store *StoreSource) Pop(targetType Target, operators ...int32) (*Source, error) {
|
|
|
|
tx, err := store.db.Begin()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var args = []interface{}{string(targetType)}
|
|
selectSQL := `select uid, url, target_type, source, ext, operator, update_time, streamer_id from ` + store.table + ` where target_type = ?`
|
|
if len(operators) == 0 {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, 0)
|
|
} else {
|
|
for _, operator := range operators {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, operator)
|
|
}
|
|
}
|
|
|
|
// log.Println(selectSQL + ` limit 1 for update`)
|
|
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
|
|
|
|
defer func() {
|
|
err := tx.Commit()
|
|
if err != nil {
|
|
log.Println(err)
|
|
err = tx.Rollback()
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
}
|
|
store.popCount++
|
|
}()
|
|
|
|
s := &Source{}
|
|
// uid, url, target_type, source, ext, operator
|
|
err = row.Scan(&s.Uid, &s.Url, &s.Target, &s.Source, &s.Ext, &s.Operator, &s.UpdateTime, &s.StreamerId)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
s.Set("LastOperator", s.Operator)
|
|
_, err = tx.Exec("update "+store.table+" set operator = ? where uid = ?", OperatorWait, s.Uid)
|
|
return s, nil
|
|
}
|
|
|
|
// StreamerTable 主播表名称
|
|
const StreamerTable string = "streamer"
|
|
|
|
// CollectLogTable 采集日志表
|
|
const CollectLogTable string = "collect_log"
|
|
|
|
// StreamerListTable 主播表名称
|
|
const StreamerListTable string = "streamer_list"
|
|
|
|
type StoreExtractor struct {
|
|
db *sql.DB
|
|
|
|
popCount int
|
|
errorCount int
|
|
errorLimit int
|
|
}
|
|
|
|
func (store *StoreExtractor) PopCount() int {
|
|
return store.popCount
|
|
}
|
|
|
|
func (store *StoreExtractor) Close() error {
|
|
return store.db.Close()
|
|
}
|
|
|
|
func (store *StoreExtractor) errorAlarm(err error) {
|
|
if err != nil {
|
|
log.Println("store error: ", err)
|
|
// 报警. 如果数据插入有问题
|
|
store.errorCount++
|
|
if store.errorCount >= store.errorLimit {
|
|
// 数据库频繁操作初问题 报警, 减少没意义的请求
|
|
}
|
|
} else {
|
|
if store.errorCount > 0 {
|
|
store.errorCount--
|
|
}
|
|
}
|
|
}
|
|
|
|
// NewStoreExtractor 生成一个extractor库的相关链接
|
|
func NewStoreExtractor() *StoreExtractor {
|
|
db, err := sql.Open("mysql", InitConfig.Database.ExtractorURI)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return &StoreExtractor{db: db}
|
|
}
|
|
|
|
// PopNoWait 弹出一个不用按时间间隔更新的主播信息, 主要用来测试.
|
|
func (store *StoreExtractor) PopNoWait(platform Platform, condition string, operators ...int32) (*Streamer, error) {
|
|
tx, err := store.db.Begin()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var args = []interface{}{string(platform)}
|
|
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and ` + condition
|
|
if len(operators) == 0 {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, 0)
|
|
} else {
|
|
for _, operator := range operators {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, operator)
|
|
}
|
|
}
|
|
|
|
defer func() {
|
|
err := tx.Commit()
|
|
if err != nil {
|
|
log.Println(err)
|
|
err = tx.Rollback()
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
}
|
|
store.popCount++
|
|
}()
|
|
|
|
// log.Println(selectSQL + ` limit 1 for update`)
|
|
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
|
|
|
|
s := &Streamer{}
|
|
// uid, url, target_type, source, ext, operator
|
|
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.Set("LastOperator", s.Operator)
|
|
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
|
|
return s, nil
|
|
}
|
|
|
|
// Pop 弹出一条未处理的数据
|
|
func (store *StoreExtractor) Pop(platform Platform, operators ...int32) (*Streamer, error) {
|
|
|
|
tx, err := store.db.Begin()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var args = []interface{}{string(platform)}
|
|
selectSQL := `select uid, update_time, user_id, tags, live_url, update_url, is_update_streamer, update_interval from ` + StreamerTable + ` where platform = ? and TIMESTAMPDIFF(MINUTE , update_time, CURRENT_TIMESTAMP()) >= update_interval`
|
|
if len(operators) == 0 {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, 0)
|
|
} else {
|
|
for _, operator := range operators {
|
|
selectSQL += " and operator = ?"
|
|
args = append(args, operator)
|
|
}
|
|
}
|
|
|
|
defer func() {
|
|
err := tx.Commit()
|
|
if err != nil {
|
|
log.Println(err)
|
|
err = tx.Rollback()
|
|
if err != nil {
|
|
log.Println(err)
|
|
}
|
|
}
|
|
store.popCount++
|
|
}()
|
|
|
|
// log.Println(selectSQL + ` limit 1 for update`)
|
|
row := tx.QueryRow(selectSQL+` limit 1 for update`, args...)
|
|
|
|
s := &Streamer{}
|
|
// uid, url, target_type, source, ext, operator
|
|
err = row.Scan(&s.Uid, &s.UpdateTime, &s.UserId, &s.Tags, &s.LiveUrl, &s.UpdateUrl, &s.IsUpdateStreamer, &s.UpdateInterval)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s.Set("LastOperator", s.Operator)
|
|
_, err = tx.Exec("update "+StreamerTable+" set operator = ? where uid = ?", OperatorWait, s.Uid)
|
|
return s, nil
|
|
}
|
|
|
|
// UpdateStreamerList streamerlist表, 更新数据
|
|
func (store *StoreExtractor) UpdateStreamerList(streamer IGet, fieldvalues ...interface{}) {
|
|
updateSQL := "UPDATE " + StreamerListTable + " SET "
|
|
var values []interface{}
|
|
for i := 0; i < len(fieldvalues); i += 2 {
|
|
field := fieldvalues[i]
|
|
values = append(values, fieldvalues[i+1])
|
|
updateSQL += field.(string) + " = ? "
|
|
}
|
|
updateSQL += "WHERE urlhash = ?"
|
|
values = append(values, streamer.Get("UrlHash"))
|
|
_, err := store.db.Exec(updateSQL, values...)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// InsertStreamer streamerlist表, 插入数据
|
|
func (store *StoreExtractor) InsertStreamerList(streamerlist IGet) (isExists bool) {
|
|
urlstr := streamerlist.Get("Url").(string)
|
|
|
|
_, err := store.db.Exec("insert into streamer_list(urlhash, url, platform, label, serialize, update_interval, error_msg, operator) values(?,?,?,?,?,?,?,?)",
|
|
fmt.Sprintf("%x", md5.Sum([]byte(urlstr))),
|
|
urlstr,
|
|
streamerlist.Get("Platform"),
|
|
streamerlist.Get("Label"),
|
|
streamerlist.Get("Serialize"),
|
|
streamerlist.Get("UpdateInterval"),
|
|
streamerlist.Get("ErrorMsg"),
|
|
streamerlist.Get("Operator"),
|
|
)
|
|
|
|
if err != nil {
|
|
if !strings.HasPrefix(err.Error(), "Error 1062") {
|
|
log.Println(err)
|
|
}
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// InsertStreamer Streamer表, 插入数据
|
|
func (store *StoreExtractor) InsertStreamer(streamer *Streamer) (isExists bool) {
|
|
// select uid from table where platform = ? and user_id = ?
|
|
// selectSQL := "SELECT is_update_url, uid FROM " + StreamerTable + " WHERE platform = ? AND user_id = ?"
|
|
tx, err := store.db.Begin()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
defer func() {
|
|
err = tx.Commit()
|
|
if err != nil {
|
|
rerr := tx.Rollback()
|
|
if rerr != nil {
|
|
log.Println(rerr)
|
|
}
|
|
panic(err)
|
|
}
|
|
}()
|
|
|
|
streamer.UpdateTime = sql.NullTime{Time: time.Now().Add(-time.Hour * 100000), Valid: true}
|
|
_, err = tx.Exec("INSERT IGNORE INTO "+StreamerTable+"(platform, user_id, user_name, live_url, update_url, tags, update_time) VALUES(?,?,?,?,?,?,?);",
|
|
streamer.Platform,
|
|
streamer.UserId,
|
|
streamer.UserName,
|
|
streamer.LiveUrl,
|
|
streamer.UpdateUrl,
|
|
streamer.Tags,
|
|
streamer.UpdateTime,
|
|
)
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
return false
|
|
}
|
|
|
|
// UpdateError 更新错误数据
|
|
func (store *StoreExtractor) UpdateError(isource IGetSet, err error) {
|
|
isource.Set("Operator", int32(OperatorError)+isource.Get("Operator").(int32))
|
|
isource.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true})
|
|
_, dberr := store.db.Exec("update "+StreamerTable+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
|
if dberr != nil {
|
|
// email tell owner to deal with
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// UpdateStreamerLog 只更新Streamer的关联日志和时间戳
|
|
func (store *StoreExtractor) UpdateStreamerLog(latestUid int64, streamerUid int64) {
|
|
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET latest_log_uid = ?, update_time = CURRENT_TIMESTAMP() WHERE uid = ?", latestUid, streamerUid)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// UpdateOperator Streamer表, 插入数据
|
|
func (store *StoreExtractor) UpdateOperator(isource IGet) {
|
|
_, err := store.db.Exec("update "+StreamerTable+" set operator = ?, error_msg = ? where uid = ?", isource.Get("Operator"), isource.Get("ErrorMsg"), isource.Get("Uid"))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// UpdateStreamer Streamer表, 插入数据
|
|
func (store *StoreExtractor) UpdateStreamer(streamer IGet) {
|
|
// log.Printf("UPDATE "+StreamerTable+" SET user_name = %v, live_url = %v, channel = %v, latest_log_uid = %v, tags = %v, ext = %v, operator = %v, update_time = %v, update_interval = %v WHERE uid = %v", streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
|
|
_, err := store.db.Exec("UPDATE "+StreamerTable+" SET user_name = ?, live_url = ?, channel = ?, latest_log_uid = ?, tags = ?, ext = ?, operator = ?, update_time = ?, update_interval = ? WHERE uid = ?",
|
|
streamer.Get("UserName"), streamer.Get("LiveUrl"), streamer.Get("Channel"), streamer.Get("LatestLogUid"), streamer.Get("Tags"), streamer.Get("Ext"), streamer.Get("Operator"), streamer.Get("UpdateTime"), streamer.Get("UpdateInterval"), streamer.Get("Uid"))
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// Update Streamer表, 更新指定的字段
|
|
func (store *StoreExtractor) Update(streamer IGet, fieldvalues ...interface{}) {
|
|
updateSQL := "UPDATE " + StreamerTable + " SET "
|
|
var values []interface{}
|
|
for i := 0; i < len(fieldvalues); i += 2 {
|
|
field := fieldvalues[i]
|
|
values = append(values, fieldvalues[i+1])
|
|
updateSQL += field.(string) + " = ?,"
|
|
}
|
|
updateSQL = updateSQL[0 : len(updateSQL)-1]
|
|
updateSQL += "WHERE uid = ?"
|
|
values = append(values, streamer.Get("Uid"))
|
|
_, err := store.db.Exec(updateSQL, values...)
|
|
if err != nil {
|
|
log.Println(updateSQL)
|
|
panic(err)
|
|
}
|
|
}
|
|
|
|
// InsertClog CollectLog表插入数据
|
|
func (store *StoreExtractor) InsertClog(clog IGet) int64 {
|
|
tx, err := store.db.Begin()
|
|
|
|
defer func() {
|
|
if err := recover(); err != nil {
|
|
tx.Rollback()
|
|
log.Panic(err)
|
|
}
|
|
}()
|
|
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
result, err := tx.Exec("insert into "+CollectLogTable+"(streamer_uid, platform, user_id, is_live_streaming, is_error, followers, views, giver, gratuity, live_title, live_start_time, live_end_time, update_time, tags, ext, error_msg) values(?,?,?,?,?,?,?,?,?,?,?,?,?,?,?,?)",
|
|
clog.Get("StreamerUid"), clog.Get("Platform"), clog.Get("UserId"), clog.Get("IsLiveStreaming"), clog.Get("IsError"), clog.Get("Followers"), clog.Get("Views"), clog.Get("Giver"), clog.Get("Gratuity"), clog.Get("LiveTitle"), clog.Get("LiveStartTime"), clog.Get("LiveEndTime"), clog.Get("UpdateTime"), clog.Get("Tags"), clog.Get("Ext"), clog.Get("ErrorMsg"),
|
|
)
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
logUid, err := result.LastInsertId()
|
|
if err != nil {
|
|
panic(err)
|
|
}
|
|
|
|
_, err = tx.Exec("update "+StreamerTable+" set latest_log_uid = ? where uid = ?", logUid, clog.Get("StreamerUid"))
|
|
if err = tx.Commit(); err != nil {
|
|
panic(err)
|
|
}
|
|
return logUid
|
|
}
|