2020-05-20 08:05:33 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"log"
|
|
|
|
"strings"
|
|
|
|
"testing"
|
|
|
|
|
|
|
|
"github.com/474420502/hunter"
|
|
|
|
)
|
|
|
|
|
|
|
|
type AreaCode struct {
|
|
|
|
hunter.PreCurlUrl
|
|
|
|
}
|
|
|
|
|
|
|
|
func (acode *AreaCode) Execute(cxt *hunter.TaskContext) {
|
|
|
|
content, err := cxt.Hunt()
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
extractor := hunter.NewExtractor(content.Content())
|
|
|
|
xp, err := extractor.XPath("//div[@class='ip']")
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
texts, errlist := xp.ForEachText("./h4")
|
|
|
|
if len(errlist) != 0 {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var provinces []*NameCode
|
|
|
|
provincesdict := make(map[string]*NameCode)
|
|
|
|
provincemap := make(map[string]string)
|
|
|
|
for _, t := range texts {
|
|
|
|
pcode := strings.Split(t, " ")
|
|
|
|
provincemap[pcode[1]] = pcode[0]
|
|
|
|
|
|
|
|
nc := &NameCode{Name: pcode[0], Code: pcode[1]}
|
|
|
|
provincesdict[pcode[1]] = nc
|
|
|
|
provinces = append(provinces, nc)
|
|
|
|
}
|
|
|
|
// log.Println(texts)
|
|
|
|
|
|
|
|
nxp, errlist := xp.ForEach("./ul//li")
|
|
|
|
if len(errlist) != 0 {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
texts, errlist = nxp.ForEachText("./h5")
|
|
|
|
if len(errlist) != 0 {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
var temp []string
|
|
|
|
// var areas []*NameCode
|
|
|
|
areasdict := make(map[string]*NameCode)
|
|
|
|
for _, t := range texts {
|
|
|
|
pcode := strings.Split(t, " ")
|
|
|
|
var name, code string
|
|
|
|
var nval string
|
|
|
|
switch pcode[0] {
|
|
|
|
case "市辖区":
|
|
|
|
name = provincemap[pcode[1][0:2]] + pcode[0]
|
|
|
|
code = pcode[1]
|
|
|
|
case "县":
|
|
|
|
name = provincemap[pcode[1][0:2]] + pcode[0] + "区"
|
|
|
|
code = pcode[1]
|
|
|
|
case "省直辖县级行政区划":
|
|
|
|
name = provincemap[pcode[1][0:2]] + pcode[0]
|
|
|
|
code = pcode[1]
|
|
|
|
default:
|
|
|
|
name = pcode[0]
|
|
|
|
code = pcode[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
nval = name + " " + code
|
|
|
|
temp = append(temp, nval)
|
|
|
|
|
|
|
|
nc := &NameCode{Name: name, Code: code}
|
|
|
|
areasdict[code] = nc
|
|
|
|
pnc := provincesdict[code[0:2]]
|
|
|
|
pnc.Child = append(pnc.Child, nc)
|
|
|
|
|
|
|
|
// areas = append(areas, nc)
|
|
|
|
}
|
|
|
|
|
|
|
|
log.Println(temp)
|
|
|
|
|
|
|
|
texts, errlist = nxp.ForEachText("./ul//li")
|
|
|
|
if len(errlist) != 0 {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
log.Println(texts)
|
|
|
|
|
|
|
|
for _, t := range texts {
|
|
|
|
pcode := strings.Split(t, " ")
|
|
|
|
|
|
|
|
var name, code string
|
|
|
|
switch pcode[0] {
|
|
|
|
case "市辖区":
|
|
|
|
name = areasdict[pcode[1][0:4]].Name + pcode[0]
|
|
|
|
code = pcode[1]
|
|
|
|
default:
|
|
|
|
name = pcode[0]
|
|
|
|
code = pcode[1]
|
|
|
|
}
|
|
|
|
|
|
|
|
nc := &NameCode{Name: name, Code: code}
|
|
|
|
|
|
|
|
anc := areasdict[nc.Code[0:4]]
|
|
|
|
anc.Child = append(anc.Child, nc)
|
|
|
|
}
|
|
|
|
|
|
|
|
kl := &KeyList{}
|
|
|
|
for _, p := range provinces {
|
|
|
|
kl.AppendKey(p)
|
|
|
|
}
|
|
|
|
SaveData("./data/province.gob", kl)
|
|
|
|
}
|
|
|
|
|
2020-05-20 08:16:45 +00:00
|
|
|
func estHunterCountry(t *testing.T) {
|
2020-05-20 08:05:33 +00:00
|
|
|
|
|
|
|
// gob.Register(&NameCode{})
|
|
|
|
|
|
|
|
defer db.Close()
|
|
|
|
|
|
|
|
curlBash := `curl 'http://www.ip33.com/area_code.html'`
|
|
|
|
|
|
|
|
ht := hunter.NewHunter() // first params PreCurlUrl
|
|
|
|
curl := hunter.PreCurlUrl(curlBash)
|
|
|
|
ht.AddTask(&AreaCode{PreCurlUrl: curl})
|
|
|
|
|
|
|
|
ht.Execute()
|
|
|
|
}
|
|
|
|
|
|
|
|
func estLoadProvince(t *testing.T) {
|
|
|
|
kl := &KeyList{}
|
|
|
|
LoadGob("./data/province.gob", kl)
|
2020-05-20 08:51:40 +00:00
|
|
|
t.Error(GetRandomKeyBySlice(kl.GetKeys()).(NameCode))
|
2020-05-20 08:05:33 +00:00
|
|
|
}
|