删除数据提取依赖

This commit is contained in:
huangsimin 2020-06-03 14:27:09 +08:00
parent 3bbcd53dd8
commit 3b11afe037
3 changed files with 60 additions and 60 deletions

89
base.go
View File

@ -4,13 +4,10 @@ import (
"bytes"
"compress/flate"
"encoding/gob"
fmt "fmt"
"log"
"math/rand"
"os"
"reflect"
"github.com/tecbot/gorocksdb"
)
type KeyKind struct {
@ -46,29 +43,29 @@ func CheckErrorPanic(err error) {
}
// OpenDB 打开rocksdb
func OpenDB() *gorocksdb.DB {
bbto := gorocksdb.NewDefaultBlockBasedTableOptions()
// func OpenDB() *gorocksdb.DB {
// bbto := gorocksdb.NewDefaultBlockBasedTableOptions()
bbto.SetBlockCache(gorocksdb.NewLRUCache(2 << 30))
bbto.SetCacheIndexAndFilterBlocksWithHighPriority(true)
// bbto.SetBlockCache(gorocksdb.NewLRUCache(2 << 30))
// bbto.SetCacheIndexAndFilterBlocksWithHighPriority(true)
opts := gorocksdb.NewDefaultOptions()
// opts := gorocksdb.NewDefaultOptions()
bbto.SetFilterPolicy(gorocksdb.NewBloomFilter(16))
// bbto.SetFilterPolicy(gorocksdb.NewBloomFilter(16))
opts.SetBlockBasedTableFactory(bbto)
opts.SetCreateIfMissing(true)
opts.SetCreateIfMissingColumnFamilies(true)
opts.SetCompression(gorocksdb.LZ4Compression)
// opts.SetBlockBasedTableFactory(bbto)
// opts.SetCreateIfMissing(true)
// opts.SetCreateIfMissingColumnFamilies(true)
// opts.SetCompression(gorocksdb.LZ4Compression)
db, err := gorocksdb.OpenDb(opts, ".rocksdb")
CheckErrorPanic(err)
return db
}
// db, err := gorocksdb.OpenDb(opts, ".rocksdb")
// CheckErrorPanic(err)
// return db
// }
var db = OpenDB()
var wopts = gorocksdb.NewDefaultWriteOptions()
var ropts = gorocksdb.NewDefaultReadOptions()
// var db = OpenDB()
// var wopts = gorocksdb.NewDefaultWriteOptions()
// var ropts = gorocksdb.NewDefaultReadOptions()
// IKeyList key list interface
type IKeyList interface {
@ -95,37 +92,37 @@ func SaveData(fname string, v interface{}) {
}
// SaveGob 用于解析rocksdb源数据, 爬取数据存储在rocksdb后取出 序列化存储
func SaveGob(fname string, datatype reflect.Type, kfs ...KeyKind) {
// func SaveGob(fname string, datatype reflect.Type, kfs ...KeyKind) {
// cl := &CountryList{}
kl := &KeyList{}
// // cl := &CountryList{}
// kl := &KeyList{}
i := 0
iter := db.NewIterator(ropts)
bkey := []byte(kfs[0].Key)
iter.Seek(bkey)
// i := 0
// iter := db.NewIterator(ropts)
// bkey := []byte(kfs[0].Key)
// iter.Seek(bkey)
for ; iter.ValidForPrefix(bkey); iter.Next() {
i++
first := iter.Value().Data()
data := reflect.New(datatype).Elem()
// for ; iter.ValidForPrefix(bkey); iter.Next() {
// i++
// first := iter.Value().Data()
// data := reflect.New(datatype).Elem()
data.FieldByName(kfs[0].FieldName).Set(reflect.ValueOf(first))
// data.FieldByName(kfs[0].FieldName).Set(reflect.ValueOf(first))
// country := &Country{}
// country.Name = name
for n := 1; n < len(kfs); n++ {
v, err := db.Get(ropts, []byte(fmt.Sprintf(kfs[n].Key, first)))
if err != nil {
panic(err)
}
fv := data.FieldByName(kfs[n].FieldName)
fv.Set(reflect.ValueOf(v.Data()))
}
kl.AppendKey(data.Interface())
}
SaveData(fname, kl)
}
// // country := &Country{}
// // country.Name = name
// for n := 1; n < len(kfs); n++ {
// v, err := db.Get(ropts, []byte(fmt.Sprintf(kfs[n].Key, first)))
// if err != nil {
// panic(err)
// }
// fv := data.FieldByName(kfs[n].FieldName)
// fv.Set(reflect.ValueOf(v.Data()))
// }
// kl.AppendKey(data.Interface())
// }
// SaveData(fname, kl)
// }
// LoadGob load gob from file
func LoadGob(fname string, v IKeyList) {

9
go.mod
View File

@ -4,16 +4,13 @@ go 1.14
require (
github.com/474420502/hunter v0.1.1
github.com/davecgh/go-spew v1.1.1
github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c // indirect
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 // indirect
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 // indirect
github.com/golang/protobuf v1.4.1
github.com/grpc-ecosystem/grpc-gateway v1.14.5
github.com/qedus/osmpbf v1.1.0
github.com/stretchr/testify v1.5.1 // indirect
github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c
google.golang.org/genproto v0.0.0-20200511104702-f5ebc3bea380
google.golang.org/grpc v1.29.1
google.golang.org/protobuf v1.22.0
gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7 // indirect
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 // indirect
launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 // indirect
)

22
go.sum
View File

@ -12,11 +12,14 @@ github.com/474420502/requests v1.5.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2
github.com/474420502/requests v1.5.1 h1:miv6O4RMbZ8I0ZdUTLf/EU5Dmewc/4IL/DmUMwtuv8M=
github.com/474420502/requests v1.5.1/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802 h1:1BDTz0u9nC3//pOCMdNH+CiXJVYJh5UQNCOBG7jbELc=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e h1:4ZrkT/RzpnROylmoQL57iVUL57wGKTR5O6KpVnbm2tA=
github.com/BurntSushi/xgbutil v0.0.0-20160919175755-f7c97cef3b4e/go.mod h1:uw9h2sd4WWHOPdJ13MQpwK5qYWKYDumDqxWWIknEQ+k=
github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8=
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
github.com/antihax/optional v0.0.0-20180407024304-ca021399b1a6/go.mod h1:V8iCPQYkqmusNa815XgQio277wI47sdRh1dUOLdyC6Q=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
github.com/blang/semver v3.5.1+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk=
@ -27,18 +30,14 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/elazarl/goproxy v0.0.0-20190711103511-473e67f1d7d2 h1:aZtFdDNWY/yH86JPR2WX/PN63635VsE/f/nXNPAbYxY=
github.com/elazarl/goproxy v0.0.0-20190711103511-473e67f1d7d2/go.mod h1:/Zj4wYkgs4iZTTu3o/KG3Itv/qCCa8VVMlb3i9OVuzc=
github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2 h1:dWB6v3RcOy03t/bUadywsbyrQwCqZeNIEX6M1OtSZOM=
github.com/elazarl/goproxy/ext v0.0.0-20190711103511-473e67f1d7d2/go.mod h1:gNh8nYJoAm43RfaxurUnxr+N1PwuFV3ZMl/efxlIlY8=
github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4=
github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98=
github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c=
github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c h1:8ISkoahWXwZR41ois5lSJBSVw4D0OV19Ht/JSTzvSv0=
github.com/facebookgo/ensure v0.0.0-20200202191622-63f1cf65ac4c/go.mod h1:Yg+htXGokKKdzcwhuNDwVvN+uBxDGXJ7G/VN1d8fa64=
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052 h1:JWuenKqqX8nojtoVVWjGfOF9635RETekkoH6Cc9SX0A=
github.com/facebookgo/stack v0.0.0-20160209184415-751773369052/go.mod h1:UbMTZqLaRiH3MsBH8va0n7s1pQYcu3uTb8G4tygF4Zg=
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4 h1:7HZCaLC5+BZpmbhCOZJ293Lz68O7PYrF2EzeiFMwCLk=
github.com/facebookgo/subset v0.0.0-20200203212716-c811ad88dec4/go.mod h1:5tD+neXqOorC30/tWg0LCSkrqj/AR6gu8yY8/fpw1q0=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
@ -92,10 +91,11 @@ github.com/stretchr/testify v1.5.1 h1:nOGnQDM7FYENwehXlg/kFVnos3rEvtKTjRvOWSzb6H
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/tebeka/selenium v0.9.9 h1:cNziB+etNgyH/7KlNI7RMC1ua5aH1+5wUlFQyzeMh+w=
github.com/tebeka/selenium v0.9.9/go.mod h1:5Fr8+pUvU6B1OiPfkdCKdXZyr5znvVkxuPd0NOdZCQc=
github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c h1:g+WoO5jjkqGAzHWCjJB1zZfXPIAaDpzXIEJ0eS6B5Ok=
github.com/tecbot/gorocksdb v0.0.0-20191217155057-f0fad39f321c/go.mod h1:ahpPrc7HpcfEWDQRZEmnXMzHY03mLDYMCxeDzy46i+8=
github.com/tidwall/gjson v1.3.2 h1:+7p3qQFaH3fOMXAJSrdZwGKcOO/lYdGS0HqGhPqDdTI=
github.com/tidwall/gjson v1.3.2/go.mod h1:P256ACg0Mn+j1RXIDXoss50DeIABTYK1PULOJHhxOls=
github.com/tidwall/match v1.0.1 h1:PnKP62LPNxHKTwvHHZZzdOAOCtsJTjo6dZLCwpKm5xc=
github.com/tidwall/match v1.0.1/go.mod h1:LujAq0jyVjBy028G1WhWfIzbpQfMO8bBZ6Tyb0+pL9E=
github.com/tidwall/pretty v1.0.0 h1:HsD+QiTn7sK6flMKIvNmpqz1qrpP3Ps6jOKIKMooyg4=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
@ -197,6 +197,8 @@ google.golang.org/protobuf v1.22.0 h1:cJv5/xdbk1NnMPR1VP9+HU6gupuG9MLBoH1r6RHZ2M
google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7 h1:zibSPXbkfB1Dwl76rJgLa68xcdHu42qmFTe6vAnU4wA=
gopkg.in/xmlpath.v1 v1.0.0-20140413065638-a146725ea6e7/go.mod h1:wo0SW5T6XqIKCCAge330Cd5sm+7VI6v85OrQHIk50KM=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.3 h1:fvjTMHxHEw/mxHbtzPi3JCcKXQRAnQTBRo6YCJSVHKI=
gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
@ -204,4 +206,8 @@ honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087 h1:Izowp2XBH6Ya6rv+hqbceQyw/gSGoXfH/UPoTGduL54=
launchpad.net/gocheck v0.0.0-20140225173054-000000000087/go.mod h1:hj7XX3B/0A+80Vse0e+BUHsHMTEhd0O4cpUHr/e/BUM=
launchpad.net/xmlpath v0.0.0-20130614043138-000000000004 h1:B8nNZBUrx8YufDCAJjvO/lVs4GxXMQHyrjwJdJzXMFg=
launchpad.net/xmlpath v0.0.0-20130614043138-000000000004/go.mod h1:vqyExLOM3qBx7mvYRkoxjSCF945s0mbe7YynlKYXtsA=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=