121 lines
4.2 KiB
Python
121 lines
4.2 KiB
Python
import csv
|
||
import numpy as np
|
||
import matplotlib.pyplot as plt
|
||
from sklearn.preprocessing import MinMaxScaler
|
||
from keras.models import Sequential
|
||
from keras.layers import Dense, LSTM
|
||
from sklearn.metrics import mean_absolute_error
|
||
from sklearn.metrics import mean_squared_error
|
||
from sklearn.metrics import r2_score
|
||
|
||
def api_dataset():
|
||
with open('api_access_fix.csv',encoding = 'utf-8-sig') as f:
|
||
reader = csv.reader(f)
|
||
dataset = []
|
||
for item in reader:
|
||
try:
|
||
dataset.append([int(float(item[2]))])
|
||
except:
|
||
pass
|
||
for i in range(len(dataset)):
|
||
if dataset[i][0]<=500 and i < 1440:
|
||
dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6)
|
||
return np.array(dataset)
|
||
|
||
# 归一化函数
|
||
def sc_fit_transform(nDlist):
|
||
# 将所有数据归一化为0-1的范围
|
||
sc = MinMaxScaler(feature_range=(0, 1))
|
||
dataset_transform = sc.fit_transform(X=nDlist)
|
||
# 归一化后的数据
|
||
return sc, np.array(dataset_transform)
|
||
|
||
###############################################################################
|
||
# 需要之前60次的访问数据来预测下一次的数据,
|
||
timestep = 60
|
||
# 训练数据的大小
|
||
training_num = 8640
|
||
# 迭代训练10次
|
||
epoch = 10
|
||
# 每次取数据数量
|
||
batch_size = 100
|
||
###############################################################################
|
||
listDataset = api_dataset()
|
||
# print(listDataset.shape)
|
||
# 生成训练集访问数据集
|
||
xTrainDataset = listDataset[0:training_num]
|
||
# 每次的下次访问次数是训练结果
|
||
yTrainDataset = listDataset[1:training_num+1]
|
||
|
||
# 原始数据归一化
|
||
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
|
||
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
|
||
|
||
###############################################################################
|
||
# 生成lstm模型需要的训练集数据
|
||
xTrain = []
|
||
for i in range(timestep, training_num):
|
||
xTrain.append(xTrainDataset[i-timestep : i])
|
||
xTrain = np.array(xTrain)
|
||
# print(xTrain.shape)
|
||
|
||
yTrain = []
|
||
for i in range(timestep, training_num):
|
||
yTrain.append(yTrainDataset[i])
|
||
yTrain = np.array(yTrain)
|
||
# print(yTrain.shape)
|
||
###############################################################################
|
||
# 构建网络,使用的是序贯模型
|
||
model = Sequential()
|
||
#return_sequences=True返回的是全部输出,LSTM做第一层时,需要指定输入shape
|
||
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
|
||
model.add(Dense(1))
|
||
# 进行配置
|
||
model.compile(optimizer='adam',
|
||
loss='mean_squared_error',
|
||
metrics=['accuracy'])
|
||
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
|
||
model.save('my_model.h5')
|
||
###############################################################################
|
||
xTestDataset = listDataset[training_num:10080-2]
|
||
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
|
||
|
||
yTestDataset = listDataset[training_num+1:10080-1]
|
||
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
|
||
# 生成lstm模型需要的训练集数据
|
||
xTest = []
|
||
for i in range(timestep, len(xTestDataset)):
|
||
xTest.append(xTestDataset[i-timestep : i])
|
||
xTest = np.array(xTest)
|
||
print(xTest.shape)
|
||
yTest = []
|
||
for i in range(timestep, len(xTestDataset)):
|
||
yTest.append(yTestDataset[i])
|
||
# 反归一化
|
||
yTest = scTestDataseY.inverse_transform(X= yTest)
|
||
print(yTest.shape)
|
||
print(yTest)
|
||
###############################################################################
|
||
# 进行预测
|
||
yPredictes = model.predict(x=xTest)
|
||
# 反归一化
|
||
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
|
||
print(yPredictes.shape)
|
||
print(yPredictes)
|
||
###############################################################################
|
||
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
|
||
plt.plot(yTest, color='red', label='Real')
|
||
plt.plot(yPredictes, color='blue', label='Predict')
|
||
plt.title(label='Prediction')
|
||
plt.xlabel(xlabel='Time')
|
||
plt.ylabel(ylabel='Api_access_num')
|
||
plt.legend()
|
||
plt.show()
|
||
|
||
# 评估标准: mae, rmse, r2_score
|
||
mae = mean_absolute_error(yTest, yPredictes)
|
||
rmse = mean_squared_error(yTest, yPredictes, squared=False)
|
||
r2 = r2_score(yTest, yPredictes)
|
||
print(mae, rmse, r2)
|
||
# 72.02636248234026 98.38626354602893 0.9791679689516253
|
||
# 45.70792188492153 74.77525176850149 0.9880226807229917 |