predict-coin/testcase1.py

121 lines
4.2 KiB
Python
Raw Permalink Normal View History

2021-03-19 09:14:35 +00:00
import csv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
def api_dataset():
with open('api_access_fix.csv',encoding = 'utf-8-sig') as f:
reader = csv.reader(f)
dataset = []
for item in reader:
try:
dataset.append([int(float(item[2]))])
except:
pass
for i in range(len(dataset)):
if dataset[i][0]<=500 and i < 1440:
dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6)
return np.array(dataset)
# 归一化函数
def sc_fit_transform(nDlist):
# 将所有数据归一化为0-1的范围
sc = MinMaxScaler(feature_range=(0, 1))
dataset_transform = sc.fit_transform(X=nDlist)
# 归一化后的数据
return sc, np.array(dataset_transform)
###############################################################################
# 需要之前60次的访问数据来预测下一次的数据
timestep = 60
# 训练数据的大小
training_num = 8640
# 迭代训练10次
epoch = 10
# 每次取数据数量
batch_size = 100
###############################################################################
listDataset = api_dataset()
# print(listDataset.shape)
# 生成训练集访问数据集
xTrainDataset = listDataset[0:training_num]
# 每次的下次访问次数是训练结果
yTrainDataset = listDataset[1:training_num+1]
# 原始数据归一化
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
###############################################################################
# 生成lstm模型需要的训练集数据
xTrain = []
for i in range(timestep, training_num):
xTrain.append(xTrainDataset[i-timestep : i])
xTrain = np.array(xTrain)
# print(xTrain.shape)
yTrain = []
for i in range(timestep, training_num):
yTrain.append(yTrainDataset[i])
yTrain = np.array(yTrain)
# print(yTrain.shape)
###############################################################################
# 构建网络,使用的是序贯模型
model = Sequential()
#return_sequences=True返回的是全部输出LSTM做第一层时需要指定输入shape
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
model.add(Dense(1))
# 进行配置
model.compile(optimizer='adam',
loss='mean_squared_error',
metrics=['accuracy'])
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
model.save('my_model.h5')
###############################################################################
xTestDataset = listDataset[training_num:10080-2]
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
yTestDataset = listDataset[training_num+1:10080-1]
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
# 生成lstm模型需要的训练集数据
xTest = []
for i in range(timestep, len(xTestDataset)):
xTest.append(xTestDataset[i-timestep : i])
xTest = np.array(xTest)
print(xTest.shape)
yTest = []
for i in range(timestep, len(xTestDataset)):
yTest.append(yTestDataset[i])
# 反归一化
yTest = scTestDataseY.inverse_transform(X= yTest)
print(yTest.shape)
print(yTest)
###############################################################################
# 进行预测
yPredictes = model.predict(x=xTest)
# 反归一化
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
print(yPredictes.shape)
print(yPredictes)
###############################################################################
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
plt.plot(yTest, color='red', label='Real')
plt.plot(yPredictes, color='blue', label='Predict')
plt.title(label='Prediction')
plt.xlabel(xlabel='Time')
plt.ylabel(ylabel='Api_access_num')
plt.legend()
plt.show()
# 评估标准: mae, rmse, r2_score
mae = mean_absolute_error(yTest, yPredictes)
rmse = mean_squared_error(yTest, yPredictes, squared=False)
r2 = r2_score(yTest, yPredictes)
print(mae, rmse, r2)
# 72.02636248234026 98.38626354602893 0.9791679689516253
# 45.70792188492153 74.77525176850149 0.9880226807229917