data_workshop/province_test.go

141 lines
2.7 KiB
Go
Raw Normal View History

package main
import (
"log"
"strings"
"testing"
"github.com/474420502/hunter"
)
type AreaCode struct {
hunter.PreCurlUrl
}
func (acode *AreaCode) Execute(cxt *hunter.TaskContext) {
content, err := cxt.Hunt()
if err != nil {
panic(err)
}
extractor := hunter.NewExtractor(content.Content())
xp, err := extractor.XPath("//div[@class='ip']")
if err != nil {
panic(err)
}
texts, errlist := xp.ForEachText("./h4")
if len(errlist) != 0 {
panic(err)
}
var provinces []*NameCode
provincesdict := make(map[string]*NameCode)
provincemap := make(map[string]string)
for _, t := range texts {
pcode := strings.Split(t, " ")
provincemap[pcode[1]] = pcode[0]
nc := &NameCode{Name: pcode[0], Code: pcode[1]}
provincesdict[pcode[1]] = nc
provinces = append(provinces, nc)
}
// log.Println(texts)
nxp, errlist := xp.ForEach("./ul//li")
if len(errlist) != 0 {
panic(err)
}
texts, errlist = nxp.ForEachText("./h5")
if len(errlist) != 0 {
panic(err)
}
var temp []string
// var areas []*NameCode
areasdict := make(map[string]*NameCode)
for _, t := range texts {
pcode := strings.Split(t, " ")
var name, code string
var nval string
switch pcode[0] {
case "市辖区":
name = provincemap[pcode[1][0:2]] + pcode[0]
code = pcode[1]
case "县":
name = provincemap[pcode[1][0:2]] + pcode[0] + "区"
code = pcode[1]
case "省直辖县级行政区划":
name = provincemap[pcode[1][0:2]] + pcode[0]
code = pcode[1]
default:
name = pcode[0]
code = pcode[1]
}
nval = name + " " + code
temp = append(temp, nval)
nc := &NameCode{Name: name, Code: code}
areasdict[code] = nc
pnc := provincesdict[code[0:2]]
pnc.Child = append(pnc.Child, nc)
// areas = append(areas, nc)
}
log.Println(temp)
texts, errlist = nxp.ForEachText("./ul//li")
if len(errlist) != 0 {
panic(err)
}
log.Println(texts)
for _, t := range texts {
pcode := strings.Split(t, " ")
var name, code string
switch pcode[0] {
case "市辖区":
name = areasdict[pcode[1][0:4]].Name + pcode[0]
code = pcode[1]
default:
name = pcode[0]
code = pcode[1]
}
nc := &NameCode{Name: name, Code: code}
anc := areasdict[nc.Code[0:4]]
anc.Child = append(anc.Child, nc)
}
kl := &KeyList{}
for _, p := range provinces {
kl.AppendKey(p)
}
SaveData("./data/province.gob", kl)
}
func TestHunterCountry(t *testing.T) {
// gob.Register(&NameCode{})
defer db.Close()
curlBash := `curl 'http://www.ip33.com/area_code.html'`
ht := hunter.NewHunter() // first params PreCurlUrl
curl := hunter.PreCurlUrl(curlBash)
ht.AddTask(&AreaCode{PreCurlUrl: curl})
ht.Execute()
}
func estLoadProvince(t *testing.T) {
kl := &KeyList{}
LoadGob("./data/province.gob", kl)
t.Error(GetRandomKeyByList(kl.GetKeys()).(NameCode))
}