package main import ( "intimate" "time" "github.com/474420502/extractor" "github.com/474420502/focus/compare" "github.com/474420502/focus/tree/heap" "log" "testing" "github.com/474420502/requests" ) // sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting)) // estore 解析存储连接实例 var estore *intimate.StoreExtractor = intimate.NewStoreExtractor() func TestMain(t *testing.T) { searchurl := "https://twitcasting.tv/rankingindex.php" queuedict := make(map[string]bool) queue := heap.New(compare.String) queue.Put(searchurl) queuedict[searchurl] = true for surl, ok := queue.Pop(); ok; surl, ok = queue.Pop() { ses := requests.NewSession() resp, err := ses.Get(surl.(string)).Execute() if err != nil { panic(err) } etor := extractor.ExtractXml(resp.Content()) // doc, err := libxml2.ParseHTML(resp.Content()) // if err != nil { // panic(err) // } // defer doc.Free() result, err := etor.XPath("//*[contains(@class, 'tag')]/@href") if err != nil { panic(err) } // result, err := doc.Find("//*[contains(@class, 'tag')]/@href") // if err != nil { // panic(err) // } // defer result.Free() iter := result.NodeIter() for iter.Next() { wurl := "https://twitcasting.tv" + iter.Node().NodeValue() if ok := queuedict[wurl]; !ok { log.Println(wurl) sl := &intimate.StreamerList{} sl.Platform = intimate.Ptwitcasting sl.Url = wurl sl.Operator = 0 sl.UpdateInterval = 120 sl.UpdateTime = time.Now() estore.InsertStreamerList(sl) queue.Put(wurl) queuedict[wurl] = true } } // doc.Find("//div[@class='tw-search-result-row']") xps, err := etor.XPaths("//div[@class='tw-search-result-row']") if err != nil { log.Println(surl, err) continue } // xps.ForEachTag(SearchProfile{}) // texts, errs := xps.ForEachText(".//span[@class='username']") // if len(errs) > 0 { // t.Error(errs) // } var splist = xps.ForEachTag(SearchProfile{}) for _, isp := range splist { sp := isp.(*SearchProfile) sp.UserId = sp.LiveUrl[1:] // log.Println(sp.(SearchProfile)) } for _, isp := range splist { log.Println(isp.(*SearchProfile)) } log.Println("finish remain", queue.Size()) } } type SearchProfile struct { UserName string `exp:".//span[@class='username']" method:"Text"` UserId string // `exp:".//span[@class='fullname']" method:"Text"` LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"` }