TODO: 重构XPath 的使用 shit
This commit is contained in:
parent
0bff7169ec
commit
23fa32b4ae
|
@ -1,3 +1,3 @@
|
||||||
database:
|
database:
|
||||||
source_uri: "root:@tcp(127.0.0.1:4000)/intimate_source?parseTime=true&loc=Local"
|
source_uri: "root:@tcp(127.0.0.1:4000)/intimate_source?parseTime=true&loc=Local&charset=utf8mb4&collation=utf8mb4_unicode_ci"
|
||||||
extractor_uri: "root:@tcp(127.0.0.1:4000)/intimate_extractor?parseTime=true&loc=Local"
|
extractor_uri: "root:@tcp(127.0.0.1:4000)/intimate_extractor?parseTime=true&loc=Local&charset=utf8mb4&collation=utf8mb4_unicode_ci"
|
3
go.mod
3
go.mod
|
@ -3,13 +3,14 @@ module intimate
|
||||||
go 1.14
|
go 1.14
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/474420502/extractor v0.2.2
|
github.com/474420502/extractor v0.4.1
|
||||||
github.com/474420502/focus v0.12.0
|
github.com/474420502/focus v0.12.0
|
||||||
github.com/474420502/gcurl v0.1.2
|
github.com/474420502/gcurl v0.1.2
|
||||||
github.com/474420502/hunter v0.3.4
|
github.com/474420502/hunter v0.3.4
|
||||||
github.com/474420502/requests v1.6.0
|
github.com/474420502/requests v1.6.0
|
||||||
github.com/go-sql-driver/mysql v1.5.0
|
github.com/go-sql-driver/mysql v1.5.0
|
||||||
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
|
github.com/lestrrat-go/libxml2 v0.0.0-20200215080510-6483566f52cb
|
||||||
|
github.com/stretchr/testify v1.6.1 // indirect
|
||||||
github.com/tebeka/selenium v0.9.9
|
github.com/tebeka/selenium v0.9.9
|
||||||
github.com/tidwall/gjson v1.6.0
|
github.com/tidwall/gjson v1.6.0
|
||||||
github.com/tidwall/pretty v1.0.1 // indirect
|
github.com/tidwall/pretty v1.0.1 // indirect
|
||||||
|
|
41
go.sum
41
go.sum
|
@ -2,16 +2,44 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMT
|
||||||
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||||
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
|
cloud.google.com/go v0.38.0/go.mod h1:990N+gfupTy94rShfmMCWGDn0LpTmnzTp2qbd1dvSRU=
|
||||||
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
|
cloud.google.com/go v0.41.0/go.mod h1:OauMR7DV8fzvZIl2qg6rkaIhD/vmgk4iwEw/h6ercmg=
|
||||||
github.com/474420502/extractor v0.2.2 h1:hGao2iZt5CEI8oqYjQW938osQdHKgNWL/bwRJQNgHTM=
|
github.com/474420502/extractor v0.3.0 h1:VURhjNFP2kG6DvPZfsRR/3JLYHURvsHazp/JazNYbME=
|
||||||
github.com/474420502/extractor v0.2.2/go.mod h1:OVFijdKLDghigpIYISHzlognL5q8eeVenT2fRhCyFns=
|
github.com/474420502/extractor v0.3.0/go.mod h1:thq0UAm30cMLY6+LJHPNRSw/H3ZrMGfmK0rk+HwycvE=
|
||||||
|
github.com/474420502/extractor v0.3.1 h1:IxOeJziOR3DPrZJhOcbOUzAc/UABmKUYGLdVgxSi9yk=
|
||||||
|
github.com/474420502/extractor v0.3.1/go.mod h1:thq0UAm30cMLY6+LJHPNRSw/H3ZrMGfmK0rk+HwycvE=
|
||||||
|
github.com/474420502/extractor v0.3.2 h1:KcgRC0+pNfK803uZjL76pgsfsnlKSMR1nQX6o6y8cVA=
|
||||||
|
github.com/474420502/extractor v0.3.2/go.mod h1:yQRtpUOeb37tMitCsenURnN2Yas9Jm/5HGFDCO+/20k=
|
||||||
|
github.com/474420502/extractor v0.3.3 h1:2/rCOEtTVkezGqz7E0D8KKN1QBKlQaihe+UMxNZcwNk=
|
||||||
|
github.com/474420502/extractor v0.3.3/go.mod h1:8cakB/mW3No6o2I7PtrVHQ35auIgHh0mGIfk1++UZm4=
|
||||||
|
github.com/474420502/extractor v0.3.4 h1:3lKV5oke46sDAxkiY4KGMeBiYI8hwNkiAa2Sf8B+xPY=
|
||||||
|
github.com/474420502/extractor v0.3.4/go.mod h1:+biDin5eKLuJQHNbW+HnPYCC+2LL090iCZNxQklB11Y=
|
||||||
|
github.com/474420502/extractor v0.3.5 h1:uq3SuPY51F1pYvAtnaJtcqtJ+yE7wcaq3LP9DWTtBnQ=
|
||||||
|
github.com/474420502/extractor v0.3.5/go.mod h1:pKjqYQCZquakvor/d9JJQYrTYInWKaVXjzAg+IM1/tY=
|
||||||
|
github.com/474420502/extractor v0.3.6 h1:Qsky2YYUCENz3BFzlFOOWykFyDOfigbkkCTnMAkKExE=
|
||||||
|
github.com/474420502/extractor v0.3.6/go.mod h1:rH+/kx0CS8xpzOBqraisQE1A9vfXAPZZ+091D8HYXvw=
|
||||||
|
github.com/474420502/extractor v0.3.7 h1:QDBd4mAjf6D+vH98LQ1SJByDTtLago9GDiEvN1oyDJ0=
|
||||||
|
github.com/474420502/extractor v0.3.7/go.mod h1:v0TAfUw1zNyFCYVqj5xyFVFpoqmqErvAd2SzMzR/yc8=
|
||||||
|
github.com/474420502/extractor v0.4.0 h1:h6MbrkCBPQ2/+VRAK741oVcZuDhZ2t4USt0MOIf/v2U=
|
||||||
|
github.com/474420502/extractor v0.4.0/go.mod h1:1oPuXIm7whY+/rU7hxDW3ick4hHc4AdiNqdk5vVWaXs=
|
||||||
|
github.com/474420502/extractor v0.4.1 h1:WqcwF7gyvGREBrXBAm3fLR7yqxP/P/arq/iHXZvt8Gg=
|
||||||
|
github.com/474420502/extractor v0.4.1/go.mod h1:1oPuXIm7whY+/rU7hxDW3ick4hHc4AdiNqdk5vVWaXs=
|
||||||
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
|
github.com/474420502/focus v0.12.0 h1:+icbmj7IEOefvTegHt5EpcHt6WFbe2miIrceUJx2Evo=
|
||||||
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
|
github.com/474420502/focus v0.12.0/go.mod h1:d0PMjtMxFz1a9HIhwyFPkWa+JF+0LgOrEUfd8iZka6s=
|
||||||
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
|
github.com/474420502/gcurl v0.1.2 h1:ON9Yz3IgAdtDlFlHfkAJ3aIEBDxH0RiViPE5ST5ohKg=
|
||||||
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
|
github.com/474420502/gcurl v0.1.2/go.mod h1:hws5q/Ao64bXLLDnldz9VyTQUndTWc/i5DzdEazFfoM=
|
||||||
github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
|
github.com/474420502/hunter v0.3.4 h1:fyLAgI84jWe3IcqsISC53j1w3CXI1FERxX//Potns0M=
|
||||||
github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
|
github.com/474420502/hunter v0.3.4/go.mod h1:pe4Xr/I+2agvq339vS/OZV+EiHAWtpXQs75rioSW9oA=
|
||||||
github.com/474420502/libxml2 v0.0.0-20200803084225-29e441d26406 h1:nLvl2D2y+hxCglLnRmLqwRGwmUsXQt8ga46zGySTU1I=
|
github.com/474420502/libxml2 v0.0.0-20200806111302-aa4be92ad592 h1:kgvx2MvoMhkrzLVjM6C6RIcshgI80fnq5/LqAnTOMxQ=
|
||||||
github.com/474420502/libxml2 v0.0.0-20200803084225-29e441d26406/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
github.com/474420502/libxml2 v0.0.0-20200806111302-aa4be92ad592/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807033034-1b43ad443d1d h1:MQduBAgnOCeGVUU+tawJxQLP1/Bgnn7119hGpVb9VFI=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807033034-1b43ad443d1d/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807033649-9731e0a44bf0 h1:EiO+pSoFk7TTv/TnVFCT/swjWQEeLAZ2wXeXsS+9+kY=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807033649-9731e0a44bf0/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807034854-eaa2a69a2790 h1:vzHGXv0e7MX+MSZcz4SjRJUfzoUpX96Qf0f48T6dkxk=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807034854-eaa2a69a2790/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807035356-cd2e51185f4b h1:q9qSCx9gm7gS6Xr2nmKqkiu2FApQJFkqvTsrAzcWXps=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807035356-cd2e51185f4b/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807040518-4ef6186ae68c h1:UZriMoPoXEA4Mq/yP+36sxwkOC3Jk3nqy2I7e3ZV470=
|
||||||
|
github.com/474420502/libxml2 v0.0.0-20200807040518-4ef6186ae68c/go.mod h1:bUbcte7hFuLijGG6/+gGxurW3XvxE/CBdfAAlsIWj34=
|
||||||
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
|
github.com/474420502/requests v1.6.0 h1:f4h4j40eT0P5whhg9LdkotD8CaKjtuDu/vz9iSUkCgY=
|
||||||
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
|
github.com/474420502/requests v1.6.0/go.mod h1:SLXrQ5dL9c7dkIeKNUCBAjOIt3J9KFCS2RQjWJecNwo=
|
||||||
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
|
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
|
||||||
|
@ -24,6 +52,9 @@ github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP
|
||||||
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
||||||
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
|
github.com/Pallinder/go-randomdata v1.2.0 h1:DZ41wBchNRb/0GfsePLiSwb0PHZmT67XY00lCDlaYPg=
|
||||||
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
github.com/Pallinder/go-randomdata v1.2.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y=
|
||||||
|
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
|
||||||
|
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||||
|
github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
|
||||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
|
||||||
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
|
||||||
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
|
github.com/blang/semver v3.5.1+incompatible h1:cQNTCjp13qL8KC3Nbxr/y2Bqb63oX6wdnnjpJbkM4JQ=
|
||||||
|
@ -40,6 +71,7 @@ github.com/go-sql-driver/mysql v1.5.0 h1:ozyZYNQW3x3HtqT1jira07DN2PArx2v7/mN66gG
|
||||||
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
|
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
|
||||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58=
|
||||||
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
|
||||||
|
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
|
||||||
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||||
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
github.com/golang/mock v1.2.0/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A=
|
||||||
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
|
github.com/golang/mock v1.3.1/go.mod h1:sBzyDLLjw3U8JLTeZvSv8jJB+tU5PVekmnlKIyFUx0Y=
|
||||||
|
@ -109,6 +141,7 @@ golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR
|
||||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
|
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e h1:3G+cUijn7XD+S4eJFddp53Pv7+slrESplyjG25HgL+k=
|
||||||
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
golang.org/x/net v0.0.0-20200324143707-d3edc9973b7e/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||||
|
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
||||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||||
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
|
||||||
|
|
2
store.go
2
store.go
|
@ -346,7 +346,7 @@ func (store *StoreExtractor) InsertStreamer(streamer IGet) (isExists bool) {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
_, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, update_time) VALUES(?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), time.Now().Add(-time.Minute*60))
|
_, err = tx.Exec("INSERT INTO "+StreamerTable+"(platform, user_id, update_url, tags, update_time) VALUES(?,?,?,?,?);", streamer.Get("Platform"), streamer.Get("UserId"), streamer.Get("UpdateUrl"), streamer.Get("Tags"), time.Now().Add(-time.Minute*60))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,14 @@
|
||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"database/sql"
|
||||||
|
"encoding/json"
|
||||||
"intimate"
|
"intimate"
|
||||||
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"os"
|
||||||
|
"os/signal"
|
||||||
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/474420502/extractor"
|
"github.com/474420502/extractor"
|
||||||
|
@ -11,9 +18,18 @@ import (
|
||||||
"log"
|
"log"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
_ "net/http/pprof"
|
||||||
|
|
||||||
"github.com/474420502/requests"
|
"github.com/474420502/requests"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func Test(t *testing.T) {
|
||||||
|
rawurl := "https://twitcasting.tv/你好"
|
||||||
|
u, _ := url.Parse(rawurl)
|
||||||
|
t.Error(u.EscapedPath())
|
||||||
|
t.Error(u.String())
|
||||||
|
}
|
||||||
|
|
||||||
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
// sstore 源存储实例, 为存储源数据的实现. 表格具体参考sql/intimate_source.sql
|
||||||
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting))
|
var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwitcasting))
|
||||||
|
|
||||||
|
@ -21,44 +37,74 @@ var sstore *intimate.StoreSource = intimate.NewStoreSource(string(intimate.STTwi
|
||||||
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
var estore *intimate.StoreExtractor = intimate.NewStoreExtractor()
|
||||||
|
|
||||||
func TestMain(t *testing.T) {
|
func TestMain(t *testing.T) {
|
||||||
|
f, _ := os.OpenFile("./log", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm)
|
||||||
|
log.SetFlags(log.Llongfile | log.Ltime)
|
||||||
|
log.SetOutput(f)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
log.Println(http.ListenAndServe(":4040", nil))
|
||||||
|
}()
|
||||||
|
|
||||||
|
homeurl := "https://twitcasting.tv"
|
||||||
searchurl := "https://twitcasting.tv/rankingindex.php"
|
searchurl := "https://twitcasting.tv/rankingindex.php"
|
||||||
queuedict := make(map[string]bool)
|
queuedict := make(map[string]bool)
|
||||||
queue := heap.New(compare.String)
|
queue := heap.New(compare.String)
|
||||||
queue.Put(searchurl)
|
queue.Put(searchurl)
|
||||||
queuedict[searchurl] = true
|
queuedict[searchurl] = true
|
||||||
|
ses := requests.NewSession()
|
||||||
|
ses.Config().SetTimeout(15)
|
||||||
|
|
||||||
for surl, ok := queue.Pop(); ok; surl, ok = queue.Pop() {
|
var surl interface{}
|
||||||
|
var ok bool
|
||||||
|
var debugsp *SearchProfile
|
||||||
|
var content []byte
|
||||||
|
|
||||||
ses := requests.NewSession()
|
defer func() {
|
||||||
resp, err := ses.Get(surl.(string)).Execute()
|
if ierr := recover(); ierr != nil {
|
||||||
|
log.Println(surl, debugsp)
|
||||||
|
f, _ := os.OpenFile("./error.html", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm)
|
||||||
|
f.Write(content)
|
||||||
|
f.Close()
|
||||||
|
log.Panic(ierr)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
signalchan := make(chan os.Signal)
|
||||||
|
signal.Notify(signalchan, syscall.SIGINT, syscall.SIGKILL, syscall.SIGQUIT, syscall.SIGTERM, syscall.SIGSTOP)
|
||||||
|
log.Println("accept stop command:", <-signalchan)
|
||||||
|
f, _ := os.OpenFile("./error.html", os.O_TRUNC|os.O_CREATE|os.O_RDWR, os.ModePerm)
|
||||||
|
f.Write(content)
|
||||||
|
f.Close()
|
||||||
|
os.Exit(1)
|
||||||
|
}()
|
||||||
|
|
||||||
|
for surl, ok = queue.Pop(); ok; surl, ok = queue.Pop() {
|
||||||
|
u, err := url.Parse(surl.(string))
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
log.Println(err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
resp, err := ses.Get(u.String()).Execute()
|
||||||
|
if err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
log.Println(u.String(), surl)
|
||||||
|
continue
|
||||||
|
// log.Panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
content = resp.Content()
|
||||||
etor := extractor.ExtractXml(resp.Content())
|
etor := extractor.ExtractXml(resp.Content())
|
||||||
|
result, err := etor.XPath("//p[@class='taglist']/a[contains(@class, 'tag')]/@href")
|
||||||
// doc, err := libxml2.ParseHTML(resp.Content())
|
|
||||||
// if err != nil {
|
|
||||||
// panic(err)
|
|
||||||
// }
|
|
||||||
// defer doc.Free()
|
|
||||||
|
|
||||||
result, err := etor.XPath("//*[contains(@class, 'tag')]/@href")
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// result, err := doc.Find("//*[contains(@class, 'tag')]/@href")
|
|
||||||
// if err != nil {
|
|
||||||
// panic(err)
|
|
||||||
// }
|
|
||||||
// defer result.Free()
|
|
||||||
|
|
||||||
iter := result.NodeIter()
|
iter := result.NodeIter()
|
||||||
for iter.Next() {
|
for iter.Next() {
|
||||||
|
|
||||||
wurl := "https://twitcasting.tv" + iter.Node().NodeValue()
|
wurl := homeurl + iter.Node().NodeValue()
|
||||||
if ok := queuedict[wurl]; !ok {
|
if ok := queuedict[wurl]; !ok {
|
||||||
log.Println(wurl)
|
log.Println(wurl)
|
||||||
sl := &intimate.StreamerList{}
|
sl := &intimate.StreamerList{}
|
||||||
|
@ -67,7 +113,9 @@ func TestMain(t *testing.T) {
|
||||||
sl.Operator = 0
|
sl.Operator = 0
|
||||||
sl.UpdateInterval = 120
|
sl.UpdateInterval = 120
|
||||||
sl.UpdateTime = time.Now()
|
sl.UpdateTime = time.Now()
|
||||||
|
|
||||||
estore.InsertStreamerList(sl)
|
estore.InsertStreamerList(sl)
|
||||||
|
|
||||||
queue.Put(wurl)
|
queue.Put(wurl)
|
||||||
queuedict[wurl] = true
|
queuedict[wurl] = true
|
||||||
}
|
}
|
||||||
|
@ -80,21 +128,53 @@ func TestMain(t *testing.T) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
// xps.ForEachTag(SearchProfile{})
|
log.Println("extract tag")
|
||||||
|
|
||||||
// texts, errs := xps.ForEachText(".//span[@class='username']")
|
|
||||||
// if len(errs) > 0 {
|
|
||||||
// t.Error(errs)
|
|
||||||
// }
|
|
||||||
var splist = xps.ForEachTag(SearchProfile{})
|
var splist = xps.ForEachTag(SearchProfile{})
|
||||||
|
log.Println("finish extract tag")
|
||||||
for _, isp := range splist {
|
for _, isp := range splist {
|
||||||
sp := isp.(*SearchProfile)
|
sp := isp.(*SearchProfile)
|
||||||
|
if sp.LiveUrl == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
sp.UserId = sp.LiveUrl[1:]
|
sp.UserId = sp.LiveUrl[1:]
|
||||||
|
for i := 0; i < len(sp.TagUrl); i++ {
|
||||||
|
wurl := homeurl + sp.TagUrl[i]
|
||||||
|
sp.TagUrl[i] = wurl
|
||||||
|
if ok := queuedict[wurl]; !ok {
|
||||||
|
sl := &intimate.StreamerList{}
|
||||||
|
sl.Platform = intimate.Ptwitcasting
|
||||||
|
sl.Url = wurl
|
||||||
|
sl.Operator = 0
|
||||||
|
sl.UpdateInterval = 120
|
||||||
|
sl.UpdateTime = time.Now()
|
||||||
|
estore.InsertStreamerList(sl)
|
||||||
|
|
||||||
|
queue.Put(wurl)
|
||||||
|
queuedict[wurl] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
// log.Println(sp.(SearchProfile))
|
// log.Println(sp.(SearchProfile))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Println("find user:", len(splist))
|
||||||
for _, isp := range splist {
|
for _, isp := range splist {
|
||||||
log.Println(isp.(*SearchProfile))
|
sp := isp.(*SearchProfile)
|
||||||
|
// log.Println(sp)
|
||||||
|
streamer := &intimate.Streamer{}
|
||||||
|
streamer.Platform = intimate.Ptwitcasting
|
||||||
|
streamer.LiveUrl = sql.NullString{String: sp.LiveUrl, Valid: true}
|
||||||
|
if btags, err := json.Marshal(sp.Tag); err != nil {
|
||||||
|
log.Println(err)
|
||||||
|
} else {
|
||||||
|
streamer.Tags = btags
|
||||||
|
}
|
||||||
|
streamer.UpdateInterval = 120
|
||||||
|
streamer.UpdateTime = sql.NullTime{Time: time.Now(), Valid: true}
|
||||||
|
streamer.UserName = sql.NullString{String: sp.UserName, Valid: true}
|
||||||
|
streamer.UserId = sp.UserId
|
||||||
|
debugsp = sp
|
||||||
|
estore.InsertStreamer(streamer)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("finish remain", queue.Size())
|
log.Println("finish remain", queue.Size())
|
||||||
|
@ -102,7 +182,9 @@ func TestMain(t *testing.T) {
|
||||||
}
|
}
|
||||||
|
|
||||||
type SearchProfile struct {
|
type SearchProfile struct {
|
||||||
UserName string `exp:".//span[@class='username']" method:"Text"`
|
UserName string `exp:".//span[@class='username']" method:"Text"`
|
||||||
UserId string // `exp:".//span[@class='fullname']" method:"Text"`
|
UserId string // `exp:".//span[@class='fullname']" method:"Text"`
|
||||||
LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"`
|
LiveUrl string `exp:".//div[@class='usertext']/a[@href]" method:"Attribute,href Value"`
|
||||||
|
Tag []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Text"`
|
||||||
|
TagUrl []string `exp:".//a[contains(@class, 'tag tag-mini')]" method:"Attribute,href Value"`
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user