money-money/main.go

372 lines
11 KiB
Go
Raw Normal View History

2022-06-20 18:17:29 +00:00
package moneymoney
import (
"bytes"
"context"
"encoding/csv"
2022-06-21 05:38:38 +00:00
"encoding/gob"
2022-06-21 17:19:21 +00:00
"encoding/json"
"fmt"
2022-06-20 18:17:29 +00:00
"io/ioutil"
"log"
2022-06-21 17:19:21 +00:00
"net"
2022-06-21 05:38:38 +00:00
"os"
2022-06-20 18:17:29 +00:00
"regexp"
"strconv"
"time"
"github.com/474420502/gcurl"
2022-06-21 17:19:21 +00:00
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/devices"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
2022-06-21 05:38:38 +00:00
"github.com/tidwall/gjson"
2022-06-22 10:23:18 +00:00
"go.mongodb.org/mongo-driver/bson"
2022-06-20 18:17:29 +00:00
"go.mongodb.org/mongo-driver/mongo"
"go.mongodb.org/mongo-driver/mongo/options"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
)
type Stock struct {
Date string `json:"日期" bson:"日期"`
CodeStr string `json:"股票代码" bson:"股票代码"`
Name string `json:"名称" bson:"名称"`
ClosingPrice float64 `json:"收盘价" bson:"收盘价"`
MaxPrice float64 `json:"最高价" bson:"最高价"`
MinPrice float64 `json:"最低价" bson:"最低价"`
OpeningPrice float64 `json:"开盘价" bson:"开盘价"`
PreviousClosingPrice float64 `json:"前收盘" bson:"前收盘"`
UpsDowns float64 `json:"涨跌额" bson:"涨跌额"`
UpsDownsRatio float64 `json:"涨跌幅" bson:"涨跌幅"`
TurnoverRate float64 `json:"换手率" bson:"换手率"`
Volume float64 `json:"成交量" bson:"成交量"`
Turnover float64 `json:"成交金额" bson:"成交金额"`
MarketValue float64 `json:"总市值" bson:"总市值"`
CirculatingMarketValue float64 `json:"流通市值" bson:"流通市值"`
Code int `json:"股票数字代码" bson:"股票数字代码"`
}
2022-06-21 17:19:21 +00:00
type StockBase struct {
// CodeStr string // 代地区码
// Code string // 不带地区码
CODE string `json:"CODE"`
FIVE_MINUTE float64 `json:"FIVE_MINUTE"`
HIGH float64 `json:"HIGH"`
HS float64 `json:"HS"`
LB float64 `json:"LB"`
LOW float64 `json:"LOW"`
MCAP float64 `json:"MCAP"`
MFSUM float64 `json:"MFSUM"`
NAME string `json:"NAME"`
OPEN float64 `json:"OPEN"`
PE float64 `json:"PE"`
PERCENT float64 `json:"PERCENT"`
PRICE float64 `json:"PRICE"`
SNAME string `json:"SNAME"`
SYMBOL string `json:"SYMBOL"`
TCAP float64 `json:"TCAP"`
TURNOVER float64 `json:"TURNOVER"`
UPDOWN float64 `json:"UPDOWN"`
VOLUME float64 `json:"VOLUME"`
WB float64 `json:"WB"`
YESTCLOSE float64 `json:"YESTCLOSE"`
ZF float64 `json:"ZF"`
NO float64 `json:"NO"`
2022-06-21 05:38:38 +00:00
}
2022-06-22 10:23:18 +00:00
var DefaultPage *rod.Page
func GetDefaultPage() *rod.Page {
if DefaultPage != nil {
return DefaultPage
}
screen := devices.Device{
Title: "Laptop with MDPI screen",
Capabilities: []string{"touch", "mobile"},
UserAgent: "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36",
Screen: devices.Screen{
DevicePixelRatio: 1,
Horizontal: devices.ScreenSize{
Width: 1920,
Height: 1080,
},
},
}
port := GetPort()
log.Println("get port:", port)
rodlauncher := launcher.New().
Bin(`google-chrome`).
RemoteDebuggingPort(port).
Set("user-data-dir", fmt.Sprintf("/tmp/%s_rod", "money-money")).
Delete("headless")
//debug url
launchers := rodlauncher.MustLaunch()
fmt.Printf("debug url: %s\n", launchers)
//连接浏览器
browser := rod.New().ControlURL(launchers).MustConnect()
page := browser.DefaultDevice(screen).MustPage()
// p := page.Timeout(time.Second * 15)
DefaultPage = page
return DefaultPage
}
2022-06-20 18:17:29 +00:00
func main() {
2022-06-21 05:38:38 +00:00
log.SetFlags(log.Llongfile | log.LstdFlags)
2022-06-20 18:17:29 +00:00
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
2022-06-22 10:23:18 +00:00
// client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://localhost:27017"))
client, err := mongo.Connect(ctx, options.Client().ApplyURI("mongodb://root:6601502@localhost:27017"))
if err != nil {
panic(err)
}
cur, err := client.Database("money").Collection("stock").Aggregate(context.TODO(), bson.A{
bson.M{"$group": bson.M{"_id": "$股票数字代码"}},
})
2022-06-21 05:38:38 +00:00
if err != nil {
panic(err)
}
2022-06-22 10:23:18 +00:00
var skipMap map[string]bool = make(map[string]bool)
for cur.Next(context.TODO()) {
var doc bson.M
err = cur.Decode(&doc)
if err == nil {
skipMap[strconv.Itoa(int(doc["_id"].(int32)))] = true
} else {
log.Panic(err)
}
}
2022-06-21 05:38:38 +00:00
murl := `curl 'http://quotes.money.163.com/hs/service/diyrank.php?host=http%3A%2F%2Fquotes.money.163.com%2Fhs%2Fservice%2Fdiyrank.php&page=1&query=STYPE%3AEQA&fields=NO%2CSYMBOL%2CNAME%2CPRICE%2CPERCENT%2CUPDOWN%2CFIVE_MINUTE%2COPEN%2CYESTCLOSE%2CHIGH%2CLOW%2CVOLUME%2CTURNOVER%2CHS%2CLB%2CWB%2CZF%2CPE%2CMCAP%2CTCAP%2CMFSUM%2CMFRATIO.MFRATIO2%2CMFRATIO.MFRATIO10%2CSNAME%2CCODE%2CANNOUNMT%2CUVSNEWS&sort=PERCENT&order=desc&count=1000&type=query' \
-H 'Accept: application/json, text/javascript, */*; q=0.01' \
-H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \
-H 'Connection: keep-alive' \
-H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; s_n_f_l_n3=90474b666b6678eb1655739716131; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=0601857%7C0601808; _ntes_stock_recent_=0601857%7C0601808; _ntes_stock_recent_=0601857%7C0601808; pgr_n_f_l_n3=90474b666b6678eb165574055174140; vinfo_n_f_l_n3=90474b666b6678eb.1.1.1655737842842.1655738334425.1655740555481' \
-H 'Referer: http://quotes.money.163.com/old/' \
-H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' \
2022-06-22 10:23:18 +00:00
-H 'X-Requested-With: XMLHttpRequest'`
2022-06-21 05:38:38 +00:00
tp := gcurl.Parse(murl).Temporary()
page := tp.QueryParam(`page=\d+`)
var stockCodesFile = "./stock_codes.gob"
2022-06-21 17:19:21 +00:00
var stockCodes []*StockBase
2022-06-21 05:38:38 +00:00
f, err := os.Open(stockCodesFile)
if err == nil {
err = gob.NewDecoder(f).Decode(&stockCodes)
if err != nil {
panic(err)
}
} else {
var i int64 = 0
var pagecount int64 = 100
for ; i < pagecount; i++ {
page.IntSet(i)
resp, err := tp.Execute()
if err != nil {
panic(err)
}
2022-06-21 17:19:21 +00:00
2022-06-21 05:38:38 +00:00
jr := gjson.ParseBytes(resp.Content())
pagecount = jr.Get("pagecount").Int()
2022-06-21 17:19:21 +00:00
for _, s := range jr.Get("list").Array() {
var stockCode StockBase
err = json.Unmarshal([]byte(s.String()), &stockCode)
if err != nil {
panic(err)
}
stockCodes = append(stockCodes, &stockCode)
2022-06-21 05:38:38 +00:00
}
2022-06-21 17:19:21 +00:00
2022-06-21 05:38:38 +00:00
// log.Println(jr.String())
}
f, err = os.OpenFile(stockCodesFile, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0664)
if err != nil {
panic(err)
}
err = gob.NewEncoder(f).Encode(&stockCodes)
if err != nil {
panic(err)
}
}
if f != nil {
err = f.Close()
if err != nil {
panic(err)
}
}
for _, code := range stockCodes {
2022-06-22 10:23:18 +00:00
if _, ok := skipMap[code.SYMBOL]; ok {
continue
}
2022-06-21 17:19:21 +00:00
if code.MCAP >= 50000000000 {
DownloadDataFromCode(client, code)
}
2022-06-21 05:38:38 +00:00
}
}
2022-06-21 17:19:21 +00:00
func DownloadDataFromCode(client *mongo.Client, code *StockBase) {
2022-06-21 05:38:38 +00:00
// 300731
2022-06-22 10:23:18 +00:00
// durl := `curl 'http://quotes.money.163.com/service/chddata.html?code=${CODE}&start=20170101&end=20220621&fields=TCLOSE;HIGH;LOW;TOPEN;LCLOSE;CHG;PCHG;TURNOVER;VOTURNOVER;VATURNOVER;TCAP;MCAP' \
// -H 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9' \
// -H 'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8' \
// -H 'Connection: keep-alive' \
// -H 'Cookie: _ntes_nnid=07a59ac6cc3c3873093db99e3419a5c7,1652972918736; _ntes_nuid=07a59ac6cc3c3873093db99e3419a5c7; _antanalysis_s_id=1655737843219; ne_analysis_trace_id=1655740348110; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; _ntes_stock_recent_=${CODE}%7C1300660%7C1300731%7C0601857%7C0601808; s_n_f_l_n3=90474b666b6678eb1655828892281; pgr_n_f_l_n3=90474b666b6678eb16558271774556486; vinfo_n_f_l_n3=90474b666b6678eb.1.6.1655737842842.1655828160869.1655828914287' \
// -H 'Referer: http://quotes.money.163.com/trade/lsjysj_${SYMBOL}.html' \
// -H 'Upgrade-Insecure-Requests: 1' \
// -H 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.0.0 Safari/537.36' `
2022-06-21 17:19:21 +00:00
// http://quotes.money.163.com/0601988.html
2022-06-22 10:23:18 +00:00
page := GetDefaultPage()
stockurl := fmt.Sprintf("http://quotes.money.163.com/%s.html", code.CODE)
log.Println(stockurl)
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
page.Navigate(stockurl)
page.WaitNavigation(proto.PageLifecycleEventNameFirstContentfulPaint)()
time.Sleep(time.Millisecond * 100)
ele, err := page.ElementsX(fmt.Sprintf("//a[contains(@href,'/trade/lsjysj_%s')]/@href", code.SYMBOL))
2022-06-21 17:19:21 +00:00
if err != nil {
panic(err)
}
if iter := ele.First(); iter != nil {
2022-06-22 10:23:18 +00:00
iter.WaitEnabled()
2022-06-21 17:19:21 +00:00
urlpath, err := iter.HTML()
2022-06-22 10:23:18 +00:00
log.Println("click", urlpath)
2022-06-21 17:19:21 +00:00
if err != nil {
panic(err)
}
page.MustNavigate("http://quotes.money.163.com" + urlpath)
ahref := page.MustElementX("//a[@id='downloadData']")
2022-06-22 10:23:18 +00:00
ahref.WaitEnabled()
2022-06-21 17:19:21 +00:00
ahref.Click(proto.InputMouseButtonLeft)
e := page.MustElementX("//a[@class='blue_btn submit']")
2022-06-22 10:23:18 +00:00
e.WaitEnabled()
w := page.Browser().MustWaitDownload()
2022-06-21 17:19:21 +00:00
e.Click(proto.InputMouseButtonLeft)
2022-06-22 10:23:18 +00:00
downloaddata := w()
log.Println(len(downloaddata))
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
// durl = strings.ReplaceAll(durl, `${SYMBOL}`, code.SYMBOL)
// durl = strings.ReplaceAll(durl, `${CODE}`, code.CODE)
2022-06-21 17:19:21 +00:00
2022-06-22 10:23:18 +00:00
// resp, err := gcurl.Parse(durl).Temporary().Execute()
// if err != nil {
// panic(err)
// }
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
reader := csv.NewReader(bytes.NewBuffer(downloaddata))
alls, err := reader.ReadAll()
if err != nil {
panic(err)
}
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
var jfield []string
for _, field := range alls[0] {
v, _ := GbkToUtf8([]byte(field))
jfield = append(jfield, string(v))
// log.Printf("%#v", string(v))
}
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
re, _ := regexp.Compile(`\d+`)
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
var stocks []mongo.WriteModel
for _, line := range alls[1:] {
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
var fields []string
for _, field := range line {
v, _ := GbkToUtf8([]byte(field))
fields = append(fields, string(v))
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
}
code, err := strconv.Atoi(re.FindString(fields[1]))
if err != nil {
panic(err)
}
2022-06-20 18:17:29 +00:00
2022-06-22 10:23:18 +00:00
s := &Stock{
Date: fields[0],
CodeStr: fields[1],
Name: fields[2],
ClosingPrice: ToFloat(fields[3]),
MaxPrice: ToFloat(fields[4]),
MinPrice: ToFloat(fields[5]),
OpeningPrice: ToFloat(fields[6]),
PreviousClosingPrice: ToFloat(fields[7]),
UpsDowns: ToFloat(fields[8]),
UpsDownsRatio: ToFloat(fields[9]),
TurnoverRate: ToFloat(fields[10]),
Volume: ToFloat(fields[11]),
Turnover: ToFloat(fields[12]),
MarketValue: ToFloat(fields[13]),
CirculatingMarketValue: ToFloat(fields[14]),
Code: code,
}
stocks = append(stocks, &mongo.InsertOneModel{Document: s})
2022-06-20 18:17:29 +00:00
}
2022-06-22 10:23:18 +00:00
cstock := client.Database("money").Collection("stock")
r, err := cstock.BulkWrite(context.TODO(), stocks)
2022-06-20 18:17:29 +00:00
if err != nil {
}
2022-06-22 10:23:18 +00:00
log.Println(code.CODE, r)
time.Sleep(time.Second * 2)
2022-06-20 18:17:29 +00:00
}
}
func ToFloat(s string) float64 {
if s == "None" {
return 0
}
v, err := strconv.ParseFloat(s, 64)
if err != nil {
panic(err)
}
return v
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
2022-06-21 17:19:21 +00:00
func GetPort() int {
l, _ := net.Listen("tcp", ":0") // listen on localhost
port := l.Addr().(*net.TCPAddr).Port
err := l.Close()
if err != nil {
panic(err)
}
// ip := l.Addr().(*net.TCPAddr).IP
return port
}