2018-12-18 10:45:04 +00:00
|
|
|
package parser
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
2018-12-19 17:31:26 +00:00
|
|
|
"log"
|
2018-12-19 18:46:28 +00:00
|
|
|
"regexp"
|
2018-12-19 19:28:58 +00:00
|
|
|
"strconv"
|
2018-12-19 18:46:28 +00:00
|
|
|
"strings"
|
2018-12-18 10:45:04 +00:00
|
|
|
"testing"
|
2018-12-19 17:38:57 +00:00
|
|
|
|
2018-12-19 19:36:52 +00:00
|
|
|
"github.com/tidwall/gjson"
|
2018-12-18 10:45:04 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
func TestParser(t *testing.T) {
|
2018-12-19 09:51:39 +00:00
|
|
|
a := NewADParser(12)
|
2018-12-18 10:45:04 +00:00
|
|
|
data, err := json.Marshal(a)
|
|
|
|
if err != nil {
|
|
|
|
t.Error(err)
|
|
|
|
}
|
|
|
|
t.Error(string(data))
|
|
|
|
}
|
2018-12-19 09:51:39 +00:00
|
|
|
|
|
|
|
type Toutiao struct {
|
|
|
|
Parser
|
|
|
|
// Url string // "amqp://aso:Wtu(!Ft559W%>mHK~i@172.19.30.60:5672/test_adspider"
|
|
|
|
}
|
|
|
|
|
|
|
|
func (tt *Toutiao) GetSpiderID() int {
|
|
|
|
return 1000073
|
|
|
|
}
|
|
|
|
|
2018-12-20 02:42:47 +00:00
|
|
|
func (tt *Toutiao) ToDoParser(adstring string) (presult string, err error) {
|
2018-12-19 17:31:26 +00:00
|
|
|
|
2018-12-20 02:42:47 +00:00
|
|
|
defer func() {
|
|
|
|
if err = recover().(error); err != nil {
|
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
}()
|
2018-12-19 17:51:32 +00:00
|
|
|
|
2018-12-19 18:03:30 +00:00
|
|
|
var adlist []string
|
2018-12-20 02:42:47 +00:00
|
|
|
if err = json.Unmarshal([]byte(adstring), &adlist); err != nil {
|
2018-12-19 17:31:26 +00:00
|
|
|
log.Println(err)
|
|
|
|
}
|
|
|
|
|
2018-12-19 18:03:30 +00:00
|
|
|
for _, data := range adlist {
|
2018-12-20 02:34:56 +00:00
|
|
|
log.Println("test json -----------------------")
|
2018-12-20 02:26:18 +00:00
|
|
|
var v interface{}
|
|
|
|
|
2018-12-20 02:38:04 +00:00
|
|
|
// log.Println(gjson.Unmarshal([]byte(data), &v), gjson.Parse(data).Get("app").Exists())
|
2018-12-19 19:38:21 +00:00
|
|
|
|
2018-12-19 19:18:44 +00:00
|
|
|
data = strings.Trim(data, "\"")
|
2018-12-19 19:42:59 +00:00
|
|
|
unq, _ := strconv.Unquote("\"" + data + "\"")
|
2018-12-20 02:32:56 +00:00
|
|
|
log.Println(gjson.Unmarshal([]byte(data), &v), gjson.Parse(data).Get("app").Exists())
|
2018-12-19 19:42:59 +00:00
|
|
|
data = unq
|
2018-12-20 02:32:56 +00:00
|
|
|
log.Println(gjson.Unmarshal([]byte(data), &v), gjson.Parse(data).Get("app").Exists())
|
2018-12-19 19:49:05 +00:00
|
|
|
|
|
|
|
result := gjson.Parse(data)
|
2018-12-20 02:33:44 +00:00
|
|
|
// log.Println(result.String())
|
2018-12-19 19:51:05 +00:00
|
|
|
if result.Get("mixed").Exists() {
|
2018-12-19 19:58:16 +00:00
|
|
|
log.Println("this is mixed")
|
2018-12-20 02:30:02 +00:00
|
|
|
log.Println(result.Get("mixed.track_url").Exists())
|
2018-12-19 19:49:05 +00:00
|
|
|
}
|
2018-12-19 19:38:21 +00:00
|
|
|
|
2018-12-19 19:27:01 +00:00
|
|
|
json.Unmarshal([]byte(data), &v)
|
2018-12-19 19:40:21 +00:00
|
|
|
// spew.Dump(v)
|
2018-12-19 18:46:28 +00:00
|
|
|
regexp.Compile(``)
|
2018-12-19 18:03:30 +00:00
|
|
|
}
|
|
|
|
|
2018-12-19 09:51:39 +00:00
|
|
|
adparser := NewADParser(tt.GetSpiderID())
|
|
|
|
data, err := adparser.ToJSON()
|
|
|
|
if err != nil {
|
2018-12-19 10:32:06 +00:00
|
|
|
return "", err
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
2018-12-19 10:32:06 +00:00
|
|
|
// log.Println(string(data))
|
2018-12-19 10:55:10 +00:00
|
|
|
return string(data), nil
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestParserToutiao(t *testing.T) {
|
2018-12-19 17:54:48 +00:00
|
|
|
log.SetFlags(log.Llongfile)
|
|
|
|
|
2018-12-19 09:51:39 +00:00
|
|
|
tt := Toutiao{}
|
2018-12-19 10:29:44 +00:00
|
|
|
tt.ConfigLogDB("logdb.yaml")
|
2018-12-19 09:51:39 +00:00
|
|
|
tt.ConfigQueue("queue.yaml")
|
2018-12-19 10:29:44 +00:00
|
|
|
ADParserServer(&tt)
|
2018-12-19 10:12:01 +00:00
|
|
|
|
|
|
|
t.Error("")
|
2018-12-19 09:51:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func TestMQ(t *testing.T) {
|
|
|
|
|
|
|
|
var l []interface{}
|
|
|
|
|
|
|
|
data := make(map[string]interface{})
|
|
|
|
data["fuck"] = "123"
|
|
|
|
|
|
|
|
l = append(l, data)
|
|
|
|
pjson, err := json.Marshal(&l)
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
t.Error(string(pjson))
|
|
|
|
|
|
|
|
que := NewQueue("amqp://spider:spider@172.16.6.109:5672/test_adspider", "ad_process", "CN")
|
|
|
|
que.Push(pjson)
|
|
|
|
|
|
|
|
// msgs, _, err := ch.Get("ad_process:CN", true)
|
|
|
|
// if err != nil {
|
|
|
|
// panic(err)
|
|
|
|
// }
|
|
|
|
// log.Println(string(msgs.Body))
|
|
|
|
|
|
|
|
}
|