money-money/main.go

421 lines
11 KiB
Go
Raw Normal View History

2022-06-20 18:17:29 +00:00
package moneymoney
import (
"bytes"
"context"
"encoding/csv"
2022-06-21 05:38:38 +00:00
"encoding/gob"
2022-06-21 17:19:21 +00:00
"encoding/json"
"fmt"
2022-06-20 18:17:29 +00:00
"io/ioutil"
"log"
2022-06-21 17:19:21 +00:00
"net"
2022-06-21 05:38:38 +00:00
"os"
2022-06-20 18:17:29 +00:00
"regexp"
"strconv"
"time"
"github.com/474420502/gcurl"
2022-07-03 00:38:06 +00:00
"github.com/474420502/structure/tree/treelist"
2022-06-21 17:19:21 +00:00
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/devices"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
2022-06-21 05:38:38 +00:00
"github.com/tidwall/gjson"
2022-06-22 10:23:18 +00:00
"go.mongodb.org/mongo-driver/bson"
2022-06-20 18:17:29 +00:00
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
)
2022-06-22 10:23:18 +00:00
var DefaultPage *rod.Page
func GetDefaultPage() *rod.Page {
if DefaultPage != nil {
return DefaultPage
}
screen := devices.Device{
Title: "Laptop with MDPI screen",
Capabilities: []string{"touch", "mobile"},
UserAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
Screen: devices.Screen{
DevicePixelRatio: 1,
Horizontal: devices.ScreenSize{
Width: 1920,
Height: 1080,
},
},
}
port := GetPort()
log.Println("get port:", port)
rodlauncher := launcher.New().
Bin(`google-chrome`).
RemoteDebuggingPort(port).
Set("user-data-dir", fmt.Sprintf("/tmp/%s_rod", "money-money")).
Delete("headless")
//debug url
launchers := rodlauncher.MustLaunch()
fmt.Printf("debug url: %s\n", launchers)
//连接浏览器
browser := rod.New().ControlURL(launchers).MustConnect()
page := browser.DefaultDevice(screen).MustPage()
// p := page.Timeout(time.Second * 15)
DefaultPage = page
return DefaultPage
}
2022-06-23 09:06:00 +00:00
var client *mongo.Client
2022-07-03 00:38:06 +00:00
var cstock *mongo.Collection
var DateStocks *treelist.Tree[int64]
2022-07-04 17:47:54 +00:00
var CountedDays map[int]bool
2022-06-23 09:06:00 +00:00
var err error
2022-06-21 05:38:38 +00:00
2022-06-23 09:06:00 +00:00
func init() {
2022-06-21 05:38:38 +00:00
log.SetFlags(log.Llongfile | log.LstdFlags)
2022-06-23 14:59:47 +00:00
client, err = mongo.Connect(context.TODO(), options.Client().ApplyURI("mongodb://localhost:27017"))
2022-06-22 10:23:18 +00:00
if err != nil {
panic(err)
}
2022-07-03 00:38:06 +00:00
cstock = client.Database("money").Collection("stock")
DateStocks = GetAll()
2022-07-04 17:47:54 +00:00
CountedDays = map[int]bool{}
for i := 1; i < 7; i++ {
CountedDays[1<<i] = true
}
2022-06-23 09:06:00 +00:00
}
func main() {
for _, code := range GetStocks() {
if code.MCAP >= 50000000000 {
DownloadDataFromCode(code)
}
}
}
func GetStocks() []*StockBase {
// client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://localhost:27017"))
2022-06-22 10:23:18 +00:00
2022-06-23 09:06:00 +00:00
cur, err := client.Database("money").Collection("stock").Distinct(context.TODO(), "股票数字代码", bson.M{})
2022-06-21 05:38:38 +00:00
if err != nil {
panic(err)
}
2022-06-22 10:23:18 +00:00
2022-06-23 09:06:00 +00:00
var skipMap map[int64]bool = make(map[int64]bool)
2022-06-22 10:23:18 +00:00
2022-06-23 09:06:00 +00:00
for _, idoc := range cur {
var scode = idoc.(int64)
// err = cur.Decode(&doc)
2022-06-22 10:23:18 +00:00
if err == nil {
2022-06-23 09:06:00 +00:00
skipMap[scode] = true
2022-06-22 10:23:18 +00:00
} else {
log.Panic(err)
}
}
2022-06-21 05:38:38 +00:00
var stockCodesFile = "./stock_codes.gob"
2022-06-21 17:19:21 +00:00
var stockCodes []*StockBase
2022-06-21 05:38:38 +00:00
f, err := os.Open(stockCodesFile)
if err == nil {
err = gob.NewDecoder(f).Decode(&stockCodes)
if err != nil {
panic(err)
}
} else {
2022-06-21 17:19:21 +00:00
2022-06-23 14:59:47 +00:00
murl := `curl 'http://quotes.money.163.com/hs/service/diyrank.php?page=0&query=STYPE%3AEQA&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=asc&count=6000&type=query'`
2022-06-23 09:06:00 +00:00
tp := gcurl.Parse(murl).Temporary()
resp, err := tp.Execute()
2022-06-21 05:38:38 +00:00
if err != nil {
panic(err)
}
2022-06-23 09:06:00 +00:00
jr := gjson.ParseBytes(resp.Content())
2022-06-23 14:59:47 +00:00
log.Println(len(jr.Get("list").Array()))
2022-06-23 09:06:00 +00:00
for _, s := range jr.Get("list").Array() {
var stockCode StockBase
err = json.Unmarshal([]byte(s.String()), &stockCode)
if err != nil {
panic(err)
}
stockCodes = append(stockCodes, &stockCode)
2022-06-21 05:38:38 +00:00
}
2022-06-23 09:06:00 +00:00
// log.Println(jr.String())
}
f, err = os.OpenFile(stockCodesFile, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0664)
if err != nil {
panic(err)
}
err = gob.NewEncoder(f).Encode(&stockCodes)
if err != nil {
panic(err)
2022-06-21 05:38:38 +00:00
}
if f != nil {
err = f.Close()
if err != nil {
panic(err)
}
}
2022-06-23 09:06:00 +00:00
re, _ := regexp.Compile(`\d+`)
log.Println("stocks", len(stockCodes))
var result []*StockBase
2022-06-21 05:38:38 +00:00
for _, code := range stockCodes {
2022-06-23 09:06:00 +00:00
scode, err := strconv.ParseInt(re.FindString(code.SYMBOL), 10, 64)
if err != nil {
panic(err)
}
if _, ok := skipMap[scode]; ok {
2022-06-22 10:23:18 +00:00
continue
}
2022-06-23 09:06:00 +00:00
result = append(result, code)
// if code.MCAP >= 50000000000 {
// DownloadDataFromCode(client, code)
// }
2022-06-21 05:38:38 +00:00
}
2022-06-23 09:06:00 +00:00
return result
2022-06-21 05:38:38 +00:00
}
2022-06-23 09:06:00 +00:00
func DownloadDataFromCode(code *StockBase) {
2022-06-21 05:38:38 +00:00
// 300731
2022-06-22 10:23:18 +00:00
// durl := `curl 'http://quotes.money.163.com/service/chddata.html?code=${CODE}&start=20170101&end=20220621&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' \
// -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
// -H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \
// -H 'Connection: keep-alive' \
// -H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; s_n_f_l_n3=90474b666b6678eb1655828892281; pgr_n_f_l_n3=90474b666b6678eb16558271774556486; vinfo_n_f_l_n3=90474b666b6678eb.1.6.1655737842842.1655828160869.1655828914287' \
// -H 'Referer: http://quotes.money.163.com/trade/lsjysj_${SYMBOL}.html' \
// -H 'Upgrade-Insecure-Requests: 1' \
// -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' `
2022-06-21 17:19:21 +00:00
// http://quotes.money.163.com/0601988.html
2022-06-22 10:23:18 +00:00
page := GetDefaultPage()
stockurl := fmt.Sprintf("http://quotes.money.163.com/%s.html", code.CODE)
log.Println(stockurl)
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
page.Navigate(stockurl)
page.WaitNavigation(proto.PageLifecycleEventNameFirstContentfulPaint)()
time.Sleep(time.Millisecond * 100)
ele, err := page.ElementsX(fmt.Sprintf("//a[contains(@href,'/trade/lsjysj_%s')]/@href", code.SYMBOL))
2022-06-21 17:19:21 +00:00
if err != nil {
panic(err)
}
if iter := ele.First(); iter != nil {
2022-06-22 10:23:18 +00:00
iter.WaitEnabled()
2022-06-21 17:19:21 +00:00
urlpath, err := iter.HTML()
2022-06-22 10:23:18 +00:00
log.Println("click", urlpath)
2022-06-21 17:19:21 +00:00
if err != nil {
panic(err)
}
2022-06-23 04:00:19 +00:00
page.Navigate("http://quotes.money.163.com" + urlpath)
2022-06-21 17:19:21 +00:00
ahref := page.MustElementX("//a[@id='downloadData']")
2022-06-23 04:00:19 +00:00
log.Println("wait downloadData")
2022-06-22 10:23:18 +00:00
ahref.WaitEnabled()
2022-06-21 17:19:21 +00:00
2022-06-23 04:00:19 +00:00
time.Sleep(time.Millisecond * 500)
2022-06-21 17:19:21 +00:00
ahref.Click(proto.InputMouseButtonLeft)
2022-06-23 04:00:19 +00:00
2022-06-21 17:19:21 +00:00
e := page.MustElementX("//a[@class='blue_btn submit']")
2022-06-23 04:00:19 +00:00
log.Println("wait blue_btn submit")
2022-06-22 10:23:18 +00:00
2022-06-23 04:00:19 +00:00
e.WaitEnabled()
time.Sleep(time.Millisecond * 500)
2022-06-22 10:23:18 +00:00
w := page.Browser().MustWaitDownload()
2022-06-21 17:19:21 +00:00
e.Click(proto.InputMouseButtonLeft)
2022-06-22 10:23:18 +00:00
downloaddata := w()
log.Println(len(downloaddata))
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
// durl = strings.ReplaceAll(durl, `${SYMBOL}`, code.SYMBOL)
// durl = strings.ReplaceAll(durl, `${CODE}`, code.CODE)
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
// resp, err := gcurl.Parse(durl).Temporary().Execute()
// if err != nil {
// panic(err)
// }
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
reader := csv.NewReader(bytes.NewBuffer(downloaddata))
alls, err := reader.ReadAll()
if err != nil {
panic(err)
}
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
var jfield []string
for _, field := range alls[0] {
v, _ := GbkToUtf8([]byte(field))
jfield = append(jfield, string(v))
// log.Printf("%#v", string(v))
}
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
re, _ := regexp.Compile(`\d+`)
2022-06-20 18:17:29 +00:00
2022-06-23 09:06:00 +00:00
var stocks []mongo.WriteModel
2022-06-22 10:23:18 +00:00
for _, line := range alls[1:] {
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
var fields []string
for _, field := range line {
v, _ := GbkToUtf8([]byte(field))
fields = append(fields, string(v))
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
}
2022-06-23 09:06:00 +00:00
code, err := strconv.ParseInt(re.FindString(fields[1]), 10, 64)
if err != nil {
panic(err)
}
2022-06-20 18:17:29 +00:00
2022-07-03 00:38:06 +00:00
ts, err := time.ParseInLocation("2006-01-02", fields[0], time.Local)
if err != nil {
panic(err)
}
2022-06-23 09:06:00 +00:00
s := &Stock{
2022-07-03 00:38:06 +00:00
Date: ts,
2022-06-22 10:23:18 +00:00
CodeStr: fields[1],
Name: fields[2],
ClosingPrice: ToFloat(fields[3]),
MaxPrice: ToFloat(fields[4]),
MinPrice: ToFloat(fields[5]),
OpeningPrice: ToFloat(fields[6]),
PreviousClosingPrice: ToFloat(fields[7]),
UpsDowns: ToFloat(fields[8]),
UpsDownsRatio: ToFloat(fields[9]),
TurnoverRate: ToFloat(fields[10]),
Volume: ToFloat(fields[11]),
Turnover: ToFloat(fields[12]),
MarketValue: ToFloat(fields[13]),
CirculatingMarketValue: ToFloat(fields[14]),
Code: code,
}
2022-06-23 09:06:00 +00:00
stocks = append(stocks, &mongo.InsertOneModel{Document: s})
2022-06-20 18:17:29 +00:00
}
2022-06-23 04:00:19 +00:00
2022-06-23 09:06:00 +00:00
cstock := client.Database("money").Collection("stock")
r, err := cstock.BulkWrite(context.TODO(), stocks)
if err != nil {
log.Println(err)
}
log.Println(code.SYMBOL, r)
2022-06-23 04:00:19 +00:00
time.Sleep(time.Second * 1)
2022-06-20 18:17:29 +00:00
}
}
2022-06-23 09:06:00 +00:00
func SaveFromCSV(downloaddata []byte) {
reader := csv.NewReader(bytes.NewBuffer(downloaddata))
alls, err := reader.ReadAll()
if err != nil {
panic(err)
}
var jfield []string
for _, field := range alls[0] {
v, _ := GbkToUtf8([]byte(field))
jfield = append(jfield, string(v))
// log.Printf("%#v", string(v))
}
re, _ := regexp.Compile(`\d+`)
var stocks []mongo.WriteModel
for _, line := range alls[1:] {
var fields []string
for _, field := range line {
v, _ := GbkToUtf8([]byte(field))
fields = append(fields, string(v))
}
code, err := strconv.ParseInt(re.FindString(fields[1]), 10, 64)
if err != nil {
panic(err)
}
2022-07-03 00:38:06 +00:00
ts, err := time.ParseInLocation("2006-01-02", fields[0], time.Local)
if err != nil {
panic(err)
}
2022-06-23 09:06:00 +00:00
s := &Stock{
2022-07-03 00:38:06 +00:00
Date: ts,
2022-06-23 09:06:00 +00:00
CodeStr: fields[1],
Name: fields[2],
ClosingPrice: ToFloat(fields[3]),
MaxPrice: ToFloat(fields[4]),
MinPrice: ToFloat(fields[5]),
OpeningPrice: ToFloat(fields[6]),
PreviousClosingPrice: ToFloat(fields[7]),
UpsDowns: ToFloat(fields[8]),
UpsDownsRatio: ToFloat(fields[9]),
TurnoverRate: ToFloat(fields[10]),
Volume: ToFloat(fields[11]),
Turnover: ToFloat(fields[12]),
MarketValue: ToFloat(fields[13]),
CirculatingMarketValue: ToFloat(fields[14]),
Code: code,
}
stocks = append(stocks, &mongo.InsertOneModel{Document: s})
}
cstock := client.Database("money").Collection("stock")
r, err := cstock.BulkWrite(context.TODO(), stocks)
if err != nil {
log.Println(err)
}
log.Println(r)
}
2022-06-20 18:17:29 +00:00
func ToFloat(s string) float64 {
if s == "None" {
return 0
}
v, err := strconv.ParseFloat(s, 64)
if err != nil {
panic(err)
}
return v
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
2022-06-21 17:19:21 +00:00
func GetPort() int {
l, _ := net.Listen("tcp", ":0") // listen on localhost
port := l.Addr().(*net.TCPAddr).Port
err := l.Close()
if err != nil {
panic(err)
}
// ip := l.Addr().(*net.TCPAddr).IP
return port
}