package main import ( "database/sql" "encoding/json" "intimate" "log" "regexp" "strconv" "strings" "time" "github.com/474420502/extractor" "github.com/tidwall/gjson" ) var estore = intimate.NewStoreExtractor() var sstore = intimate.NewStoreSource(string(intimate.STOpenrec)) // OpenrecExtractor 提取方法 type OpenrecExtractor struct { user *intimate.ExtractorSource userLive *intimate.ExtractorSource supporters *intimate.ExtractorSource } type UserInfo struct { UserName string `exp:"//p[ contains(@class, 'c-global__user__profile__list__name__text')]"` Followers int `exp:"//p[@class='c-global__user__count__row__right js-userCountFollowers']" mth:"r:ParseNumber"` Views int `exp:"//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']" mth:"r:ExtractNumber"` } func (oe *OpenrecExtractor) Execute() { ps := intimate.NewPerfectShutdown() var lasterr error = nil for !ps.IsClose() { var err error source, err := sstore.Pop(intimate.TOpenrecUser, 0) if err != nil { if err != lasterr { log.Println(err, lasterr) lasterr = err } time.Sleep(time.Second * 5) continue } sdata := source.Ext.([]byte) datamap := gjson.ParseBytes(sdata).Map() source.Operator = int32(intimate.OperatorError) userId := datamap["var_user_id"].String() streamer := &intimate.Streamer{} streamer.UserId = userId // streamer.Platform = intimate.Popenrec htmlUser := datamap["html_user"] oe.user = intimate.NewExtractorSource(&htmlUser) oe.user.CreateExtractor() userEtor := extractor.ExtractHtmlString(htmlUser.String()) log.Println(userEtor.GetObjectByTag(UserInfo{})) htmlLive := datamap["html_live"] oe.userLive = intimate.NewExtractorSource(&htmlLive) oe.userLive.CreateExtractor() jsonSupporters := datamap["json_supporters"] oe.supporters = intimate.NewExtractorSource(&jsonSupporters) clog := &intimate.CollectLog{} // log.Println(anchorId) oe.extractFollowers(clog) oe.extractUserName(streamer) oe.extractViewsAndLiveStreaming(clog) oe.extractGiversAndGratuity(clog) oe.extractLive(clog) oe.extractTags(clog) streamer.Uid = source.StreamerId.Int64 streamer.UpdateTime = source.UpdateTime streamer.Tags = clog.Tags clog.Platform = intimate.Popenrec clog.UserId = userId clog.UpdateTime = source.UpdateTime logUid := estore.InsertClog(clog) LiveUrl := "https://www.openrec.tv/live/" + userId streamer.LiveUrl = sql.NullString{String: LiveUrl, Valid: true} streamer.LatestLogUid = logUid // streamer.Operator = 0 log.Println(streamer.UserId) estore.Update(streamer, "user_name", streamer.UserName, "user_id", streamer.UserId, "live_url", streamer.LiveUrl, "latest_log_uid", streamer.LatestLogUid, "update_time", streamer.UpdateTime, "tags", streamer.Tags, ) source.Operator = int32(intimate.OperatorExtractorOK) sstore.UpdateOperator(source) } } func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()") if err != nil { log.Println(err) } if !xp.NodeIter().Next() { log.Println("不存在粉丝数") } followers := strings.ReplaceAll(xp.String(), ",", "") followersInt, err := strconv.ParseInt(followers, 10, 64) if err != nil { log.Println(err) } clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true}) } func (oe *OpenrecExtractor) extractUserName(streamer intimate.ISet) { extractor := oe.user.GetExtractor() xp, err := extractor.XPathResult("//p[ contains(@class, 'c-global__user__profile__list__name__text')]/text()") if err != nil { log.Println(err) } else { if xp.NodeIter().Next() { userName := xp.String() streamer.Set("UserName", sql.NullString{String: userName, Valid: true}) } } } func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) { extractor := oe.user.GetExtractor() // c-contents xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()") if err != nil { log.Println(err) } if xp.NodeIter().Next() { views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) views = strings.ReplaceAll(views, ",", "") viewsint, err := strconv.Atoi(views) if err != nil { log.Println(err) } clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true}) clog.Set("IsLiveStreaming", true) } } func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) { // extractor := oe.user.GetExtractor() giverjson := oe.supporters.GetSource() var givers []interface{} var gratuity int64 = 0 for _, v := range giverjson.Array() { giverSource := gjson.Parse(v.String()) for _, item := range giverSource.Get("data.items").Array() { givers = append(givers, item.Map()) gratuity += item.Get("total_yells").Int() } } giversbytes, err := json.Marshal(givers) if err != nil { log.Println(err) clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true}) } else { clog.Set("Giver", giversbytes) } clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true}) } func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) { extractor := oe.userLive.GetExtractor() mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})").FindStringSubmatch(oe.userLive.GetSource().Str) if len(mathes) == 2 { clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true}) content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") if err != nil { log.Println(err) } iter := content.NodeIter() if iter.Next() { tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) if err != nil { log.Println(err) } // log.Println(iter.Node().NodeValue(), tm.Local()) clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true}) duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") if err != nil { log.Println(err) } diter := duration.NodeIter() if diter.Next() { dt, err := intimate.ParseDuration(diter.Node().NodeValue()) if err != nil { log.Println(err) } endtm := tm.Add(dt) clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true}) } } } } func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) { var tags []string matheslist := regexp.MustCompile(`<[^>]+TagButton[^>]+>([^<]{1,100})<`).FindAllStringSubmatch(oe.userLive.GetSource().Str, -1) for _, m := range matheslist { tags = append(tags, m[1]) } tagsBytes, err := json.Marshal(tags) if err != nil { log.Println(err) } clog.Set("Tags", tagsBytes) }