diff --git a/extractor/openrec/openrec_extractor.go b/extractor/openrec/openrec_extractor.go new file mode 100644 index 0000000..7e846ba --- /dev/null +++ b/extractor/openrec/openrec_extractor.go @@ -0,0 +1,215 @@ +package main + +import ( + "database/sql" + "encoding/json" + "intimate" + "log" + "regexp" + "strconv" + "strings" + "time" + + "github.com/tidwall/gjson" +) + +// OpenrecExtractor 提取方法 +type OpenrecExtractor struct { + user *intimate.ExtractorSource + userLive *intimate.ExtractorSource + supporters *intimate.ExtractorSource +} + +func (oe *OpenrecExtractor) extractFollowers(clog intimate.ISet) { + extractor := oe.user.GetExtractor() + xp, err := extractor.XPathResult("//p[@class='c-global__user__count__row__right js-userCountFollowers']/text()") + if err != nil { + log.Println(err) + } + if !xp.NodeIter().Next() { + log.Println("不存在粉丝数") + } + + followers := strings.ReplaceAll(xp.String(), ",", "") + followersInt, err := strconv.ParseInt(followers, 10, 64) + if err != nil { + log.Println(err) + } + + clog.Set("Followers", sql.NullInt64{Int64: followersInt, Valid: true}) +} + +func (oe *OpenrecExtractor) extractAnchorName(ai intimate.ISet) { + extractor := oe.user.GetExtractor() + xp, err := extractor.XPathResult("//p[@class='c-global__user__profile__list__name__text official-icon--after']/text()") + if xp.NodeIter().Next() { + anchorName := xp.String() + ai.Set("AnchorName", anchorName) + } else { + log.Println(err) + } +} + +func (oe *OpenrecExtractor) extractViewsAndLiveStreaming(clog intimate.ISet) { + extractor := oe.user.GetExtractor() + // c-contents + xp, err := extractor.XPathResult("//ul[@class='c-contents']//p[@class='c-thumbnailVideo__footer__liveCount']/text()") + if err != nil { + log.Println(err) + } + if xp.NodeIter().Next() { + views := regexp.MustCompile(`[0-9,]+`).FindString(xp.String()) + views = strings.ReplaceAll(views, ",", "") + viewsint, err := strconv.Atoi(views) + if err != nil { + log.Println(err) + } + + clog.Set("Views", sql.NullInt64{Int64: int64(viewsint), Valid: true}) + clog.Set("IsLiveStreaming", int32(1)) + } +} + +func (oe *OpenrecExtractor) extractGiversAndGratuity(clog intimate.ISet) { + // extractor := oe.user.GetExtractor() + giverjson := oe.supporters.GetSource() + var givers []interface{} + var gratuity int64 = 0 + + for _, v := range giverjson.Array() { + giverSource := gjson.Parse(v.String()) + for _, item := range giverSource.Get("data.items").Array() { + givers = append(givers, item.Map()) + gratuity += item.Get("total_yells").Int() + } + } + + giversbytes, err := json.Marshal(givers) + if err != nil { + log.Println(err) + clog.Set("ErrorMsg", sql.NullString{String: err.Error(), Valid: true}) + } else { + clog.Set("Giver", giversbytes) + } + + clog.Set("Gratuity", sql.NullInt64{Int64: gratuity, Valid: true}) +} + +func (oe *OpenrecExtractor) extractLive(clog intimate.ISet) { + extractor := oe.userLive.GetExtractor() + mathes := regexp.MustCompile("MovieTitle__Title[^>]+>(.{1,50})").FindStringSubmatch(oe.userLive.GetSource().Str) + if len(mathes) == 2 { + + clog.Set("LiveTitle", sql.NullString{String: mathes[1], Valid: true}) + + content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") + if err != nil { + log.Println(err) + } + + iter := content.NodeIter() + if iter.Next() { + tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) + if err != nil { + log.Println(err) + } + clog.Set("LiveStartTime", sql.NullTime{Time: tm.Local(), Valid: true}) + + duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") + if err != nil { + log.Println(err) + } + + diter := duration.NodeIter() + if diter.Next() { + + dt, err := intimate.ParseDuration(diter.Node().NodeValue()) + if err != nil { + log.Println(err) + } + endtm := tm.Add(dt) + clog.Set("LiveEndTime", sql.NullTime{Time: endtm.Local(), Valid: true}) + } + } + } +} + +func (oe *OpenrecExtractor) extractTags(clog intimate.ISet) { + var tags []string + matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})]+>(.{1,50})").FindStringSubmatch(livejson.Str) - if len(mathes) == 2 { - - clog.SetShowTitle(sql.NullString{String: mathes[1], Valid: true}) - - content, err := extractor.XPathResult("//meta[@itemprop='uploadDate']/@content") - if err != nil { - t.Error(err) - } - - iter := content.NodeIter() - if iter.Next() { - tm, err := time.ParseInLocation("2006-01-02T15:04:05Z07:00", iter.Node().NodeValue(), time.Local) - if err != nil { - t.Error(err) - } - clog.SetShowStartTime(sql.NullTime{Time: tm.Local(), Valid: true}) - - duration, err := extractor.XPathResult("//meta[@itemprop='duration']/@content") - if err != nil { - t.Error(err) - } - - diter := duration.NodeIter() - if diter.Next() { - - dt, err := intimate.ParseDuration(diter.Node().NodeValue()) - if err != nil { - log.Println(err) - } - endtm := tm.Add(dt) - clog.SetShowEndTime(sql.NullTime{Time: endtm.Local(), Valid: true}) - } - } - } - - var tags []string - matheslist := regexp.MustCompile(`TagButton__Button[^>]+>(.{1,100})