package main import ( "database/sql" "encoding/json" "intimate" "log" "os" "os/signal" "regexp" "strconv" "strings" "sync/atomic" "syscall" "time" "github.com/tidwall/gjson" ) // OpenrecExtractor 提取方法 type OpenrecExtractor struct { user *intimate.ExtractorSource userLive *intimate.ExtractorSource supporters *intimate.ExtractorSource } func (oe *OpenrecExtractor) Execute() { var loop int32 = 1 go func() { signalchan := make(chan os.Signal) signal.Notify(signalchan, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP) log.Println("accept stop command:", <-signalchan) atomic.StoreInt32(&loop, 0) }() collect := intimate.NewExtractorStore() store := intimate.NewSourceStore("source_openrec") var lasterr error = nil for atomic.LoadInt32(&loop) > 0 { source, err := store.Pop(string(intimate.TTOpenrecRanking), 100) if err != nil { if err != lasterr { log.Println(err, lasterr) lasterr = err } time.Sleep(time.Second * 2) continue } source.SetOperator(int32(intimate.OperatorError)) anchorId := source.GetSource().String ai := &intimate.AnchorInfo{} ai.SetAnchorId(anchorId) ai.SetPlatform(string(intimate.Popenrec)) sdata := source.GetExt().([]byte) if gjson.ValidBytes(sdata) { result := gjson.ParseBytes(sdata) datamap := result.Map() oe.user = intimate.NewExtractorSource(datamap["user"]) oe.user.CreateExtractor() oe.userLive = intimate.NewExtractorSource(datamap["user_live"]) oe.userLive.CreateExtractor() oe.supporters = intimate.NewExtractorSource(datamap["supporters"]) clog := &intimate.CollectLog{} log.Println(anchorId) oe.extractFollowers(clog) oe.extractAnchorName(ai) oe.extractViewsAndLiveStreaming(clog) oe.extractGiversAndGratuity(clog) oe.extractLive(clog) oe.extractTags(clog) ai.Set("UpdateTime", source.GetUpdateTime()) LiveUrl := "https://www.openrec.tv/live/" + anchorId ai.Set("LiveUrl", sql.NullString{String: LiveUrl, Valid: true}) Uid, err := collect.InsertAnchorInfo(ai) if err != nil { log.Println(err) source.SetErrorMsg(sql.NullString{String: err.Error(), Valid: true}) store.UpdateOperator(source) return } clog.Set("Uid", Uid) clog.Set("Platform", string(intimate.Popenrec)) clog.Set("AnchorId", anchorId) clog.Set("UpdateTime", source.GetUpdateTime()) if err = collect.InsertCollectLog(clog); err != nil { source.SetErrorMsg(sql.NullString{String: err.Error(), Valid: true}) store.UpdateOperator(source) return } source.SetOperator(int32(intimate.OperatorExtractorOK)) store.UpdateOperator(source) } else { log.Println("data is not json:\n", string(sdata)) } } } func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()") if err != nil { log.Println(err) } if !xp.NodeIter().Next() { log.Println("不存在粉丝数") } followers := strings.ReplaceAll(xp.String(), ",", "") followersInt, err := strconv.ParseInt(followers, 10, 64) if err != nil { log.Println(err) } clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true}) } func (oe *OpenrecExtractor) extractAnchorName(ai intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()") if xp.NodeIter().Next() { anchorName := xp.String() ai.Set("AnchorName", anchorName) } else { log.Println(err) } } func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) { extractor := oe.user.GetExtractor() // c-contents xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()") if err != nil { log.Println(err) } if xp.NodeIter().Next() { views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) views = strings.ReplaceAll(views, ",", "") viewsint, err := strconv.Atoi(views) if err != nil { log.Println(err) } clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true}) clog.Set("IsLiveStreaming", int32(1)) } } func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) { // extractor := oe.user.GetExtractor() giverjson := oe.supporters.GetSource() var givers []interface{} var gratuity int64 = 0 for _, v := range giverjson.Array() { giverSource := gjson.Parse(v.String()) for _, item := range giverSource.Get("data.items").Array() { givers = append(givers, item.Map()) gratuity += item.Get("total_yells").Int() } } giversbytes, err := json.Marshal(givers) if err != nil { log.Println(err) clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true}) } else { clog.Set("Giver", giversbytes) } clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true}) } func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) { extractor := oe.userLive.GetExtractor() mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})").FindStringSubmatch(oe.userLive.GetSource().Str) if len(mathes) == 2 { clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true}) content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") if err != nil { log.Println(err) } iter := content.NodeIter() if iter.Next() { tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) if err != nil { log.Println(err) } clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true}) duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") if err != nil { log.Println(err) } diter := duration.NodeIter() if diter.Next() { dt, err := intimate.ParseDuration(diter.Node().NodeValue()) if err != nil { log.Println(err) } endtm := tm.Add(dt) clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true}) } } } } func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) { var tags []string matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1) for _, m := range matheslist { tags = append(tags, m[1]) } log.Println(tags) tagsBytes, err := json.Marshal(tags) if err != nil { log.Println(err) } clog.Set("Tags", tagsBytes) }