predict-coin/data.py
2021-04-02 17:22:46 +08:00

203 lines
5.7 KiB
Python

from keras.models import Sequential
from keras.layers import Dense, Dropout, Embedding
from keras.layers import InputLayer
from keras.layers import LSTM
# from keras import backend as K
import pymysql
import pickle
import os
import numpy
import time, datetime
def get_collect():
collect = {}
loadfile = "./collect.pickle"
try:
collect = pickle.load(open(loadfile, 'rb'))
except Exception as e:
print(e)
# 打开数据库连接
db = pymysql.connect(host="sg-board1.livenono.com", port=3306,user="root",passwd="Nono-databoard",db="databoard",charset="utf8")
# 使用 cursor() 方法创建一个游标对象 cursor
cursor = db.cursor()
today = time.strftime("%Y-%m-%d", time.localtime())
# 使用 execute() 方法执行 SQL 查询
cursor.execute(
'''SELECT coin, extra_coins, pay_users, create_at from pay_items_hour pih where region = "all" and platform="all" and create_at <= %s''',
(today),
)
collect_pay = {}
for row in cursor.fetchall():
# print(row)
coin, extra_coins, pay_users, create_at = row
d = str(create_at.date())
if d in collect_pay:
collect_pay[d].append(row)
else:
collect_pay[d] = [ row ]
# print(dir(create_at), create_at.timestamp(), create_at.date())
print('共查找出', cursor.rowcount, '条数据')
deletelist = []
for k in collect_pay:
if len(collect_pay[k]) != 24:
deletelist.append(k)
for k in deletelist:
del collect_pay[k]
querydate= []
for k in collect_pay:
querydate.append(k)
querydate.sort()
cursor.execute(
'''SELECT coin, users, create_at from gift_items_hour pih where region = "all" and create_at >= %s and create_at <= %s''',
(querydate[0], querydate[-1]),
)
collect_gift = {}
for row in cursor.fetchall():
coin, users, create_at = row
d = str(create_at.date())
if d in collect_gift:
collect_gift[d].append(row)
else:
collect_gift[d] = [ row ]
for k in collect_pay:
l = collect_pay[k]
l.sort(key=lambda x:x[3])
for k in collect_gift:
l = collect_gift[k]
l.sort(key=lambda x:x[2])
collect["pay"] = collect_pay
collect["gift"] = collect_gift
pickle.dump(collect, open(loadfile, 'wb+'))
finally:
return collect
def load_pay_data(textNum = 80):
collect = get_collect()
# TODO: 处理gift pay的波动关系
x_train = []
y_train = []
collect_pay = []
for k in collect["pay"]:
collect_pay.append(collect["pay"][k])
collect_pay.sort(key=lambda x:x[0][3])
lastday_v = collect_pay[0]
for cur_v in collect_pay[1:]:
total_coin = 0
users = 0
last_total_coin = 0
for v2 in lastday_v:
last_total_coin += v2[0] + v2[1]
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0] + v1[1]
users += v1[2]
# print(v1[3])
# last_total_coin += v2[0] + v2[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 时刻. 前一个小时 时刻. 当前支付总币数. 当前支付总币数 昨天币数
x_train.append([count ,total_coin / last_total_coin , total_coin])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
x_train = x_train[:len(x_train) - textNum]
y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape
def load_gift_data(textNum = 80):
collect = get_collect()
x_train = []
y_train = []
collect_gift = []
for k in collect["gift"]:
collect_gift.append(collect["gift"][k])
collect_gift.sort(key=lambda x:x[0][2])
lastday_v = collect_gift[0]
for cur_v in collect_gift[1:]:
total_coin = 0
last_total_coin = 0
users = 0
for v2 in lastday_v:
last_total_coin += v2[0]
f = 20000000.0
count = 0
for v1, v2 in zip(cur_v,lastday_v):
total_coin += v1[0]
# print(v1[3])
# last_total_coin += v2[0]
users += v1[1]
# print(v2[3])
# compare = float(total_coin - last_total_coin) / float(last_total_coin)
# print(compare)
# 参数 前一小个小时. 时刻. 当前金钱. 送礼人数
x_train.append([count, total_coin / last_total_coin, total_coin ])
count+=1
for i in range(count):
y_train.append(total_coin)
lastday_v = cur_v
input_shape = (len(x_train[0]), 1)
x_train = numpy.reshape(x_train, (len(x_train) , input_shape[0], input_shape[1]))
y_train = numpy.reshape(y_train, (len(y_train)))
# max_features = 1024
tx_train = x_train[len(x_train) - textNum:]
ty_train = y_train[len(y_train) - textNum:]
x_train = x_train[:len(x_train) - textNum]
y_train = y_train[:len(y_train) - textNum]
return x_train, y_train, tx_train, ty_train, input_shape