2018-12-18 10:45:04 +00:00
|
|
|
package parser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2018-12-19 17:31:26 +00:00
|
|
|
"log"
|
2018-12-19 18:46:28 +00:00
|
|
|
"regexp"
|
2018-12-19 19:28:58 +00:00
|
|
|
"strconv"
|
2018-12-19 18:46:28 +00:00
|
|
|
"strings"
|
2018-12-18 10:45:04 +00:00
|
|
|
"testing"
|
2018-12-19 17:38:57 +00:00
|
|
|
|
2018-12-19 19:36:52 +00:00
|
|
|
"github.com/tidwall/gjson"
|
|
|
|
|
2018-12-19 17:38:57 +00:00
|
|
|
"github.com/davecgh/go-spew/spew"
|
2018-12-18 10:45:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestParser(t *testing.T) {
|
2018-12-19 09:51:39 +00:00
|
|
|
a := NewADParser(12)
|
2018-12-18 10:45:04 +00:00
|
|
|
data, err := json.Marshal(a)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
t.Error(string(data))
|
|
|
|
}
|
2018-12-19 09:51:39 +00:00
|
|
|
|
|
|
|
type Toutiao struct {
|
|
|
|
Parser
|
|
|
|
// Url string // "amqp://aso:Wtu(!Ft559W%>mHK~i@172.19.30.60:5672/test_adspider"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tt *Toutiao) GetSpiderID() int {
|
|
|
|
return 1000073
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tt *Toutiao) ToDoParser(adstring string) (string, error) {
|
2018-12-19 17:31:26 +00:00
|
|
|
|
2018-12-19 18:03:30 +00:00
|
|
|
// adstring = strconv.QuoteToASCII(adstring)
|
|
|
|
// adstring = strings.Replace(adstring, "\\", "", -1)
|
|
|
|
// log.Println(adstring)
|
2018-12-19 17:51:32 +00:00
|
|
|
|
2018-12-19 18:03:30 +00:00
|
|
|
var adlist []string
|
2018-12-19 17:34:51 +00:00
|
|
|
if err := json.Unmarshal([]byte(adstring), &adlist); err != nil {
|
2018-12-19 17:31:26 +00:00
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
2018-12-19 17:38:57 +00:00
|
|
|
spew.Dump(adlist)
|
2018-12-19 17:31:26 +00:00
|
|
|
|
2018-12-19 18:03:30 +00:00
|
|
|
for _, data := range adlist {
|
2018-12-19 19:38:21 +00:00
|
|
|
|
2018-12-19 19:18:44 +00:00
|
|
|
data = strings.Trim(data, "\"")
|
2018-12-19 19:42:59 +00:00
|
|
|
unq, _ := strconv.Unquote("\"" + data + "\"")
|
|
|
|
data = unq
|
|
|
|
unq, _ = strconv.Unquote("\"" + data + "\"")
|
2018-12-19 19:28:58 +00:00
|
|
|
data = unq
|
2018-12-19 19:38:21 +00:00
|
|
|
var gv interface{}
|
|
|
|
gjson.Unmarshal([]byte(data), &gv)
|
2018-12-19 19:40:21 +00:00
|
|
|
spew.Dump(gv)
|
2018-12-19 19:38:21 +00:00
|
|
|
|
2018-12-19 19:27:01 +00:00
|
|
|
var v interface{}
|
|
|
|
json.Unmarshal([]byte(data), &v)
|
2018-12-19 19:40:21 +00:00
|
|
|
// spew.Dump(v)
|
2018-12-19 18:46:28 +00:00
|
|
|
regexp.Compile(``)
|
2018-12-19 18:03:30 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 09:51:39 +00:00
|
|
|
adparser := NewADParser(tt.GetSpiderID())
|
|
|
|
data, err := adparser.ToJSON()
|
|
|
|
if err != nil {
|
2018-12-19 10:32:06 +00:00
|
|
|
return "", err
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
2018-12-19 10:32:06 +00:00
|
|
|
// log.Println(string(data))
|
2018-12-19 10:55:10 +00:00
|
|
|
return string(data), nil
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestParserToutiao(t *testing.T) {
|
2018-12-19 17:54:48 +00:00
|
|
|
log.SetFlags(log.Llongfile)
|
|
|
|
|
2018-12-19 09:51:39 +00:00
|
|
|
tt := Toutiao{}
|
2018-12-19 10:29:44 +00:00
|
|
|
tt.ConfigLogDB("logdb.yaml")
|
2018-12-19 09:51:39 +00:00
|
|
|
tt.ConfigQueue("queue.yaml")
|
2018-12-19 10:29:44 +00:00
|
|
|
ADParserServer(&tt)
|
2018-12-19 10:12:01 +00:00
|
|
|
|
|
|
|
t.Error("")
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestMQ(t *testing.T) {
|
|
|
|
|
|
|
|
var l []interface{}
|
|
|
|
|
|
|
|
data := make(map[string]interface{})
|
|
|
|
data["fuck"] = "123"
|
|
|
|
|
|
|
|
l = append(l, data)
|
|
|
|
pjson, err := json.Marshal(&l)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
t.Error(string(pjson))
|
|
|
|
|
|
|
|
que := NewQueue("amqp://spider:spider@172.16.6.109:5672/test_adspider", "ad_process", "CN")
|
|
|
|
que.Push(pjson)
|
|
|
|
|
|
|
|
// msgs, _, err := ch.Get("ad_process:CN", true)
|
|
|
|
// if err != nil {
|
|
|
|
// panic(err)
|
|
|
|
// }
|
|
|
|
// log.Println(string(msgs.Body))
|
|
|
|
|
|
|
|
}
|