121 lines
4.2 KiB
Python
121 lines
4.2 KiB
Python
|
import csv
|
|||
|
import numpy as np
|
|||
|
import matplotlib.pyplot as plt
|
|||
|
from sklearn.preprocessing import MinMaxScaler
|
|||
|
from keras.models import Sequential
|
|||
|
from keras.layers import Dense, LSTM
|
|||
|
from sklearn.metrics import mean_absolute_error
|
|||
|
from sklearn.metrics import mean_squared_error
|
|||
|
from sklearn.metrics import r2_score
|
|||
|
|
|||
|
def api_dataset():
|
|||
|
with open('api_access_fix.csv',encoding = 'utf-8-sig') as f:
|
|||
|
reader = csv.reader(f)
|
|||
|
dataset = []
|
|||
|
for item in reader:
|
|||
|
try:
|
|||
|
dataset.append([int(float(item[2]))])
|
|||
|
except:
|
|||
|
pass
|
|||
|
for i in range(len(dataset)):
|
|||
|
if dataset[i][0]<=500 and i < 1440:
|
|||
|
dataset[i][0] = int(sum([dataset[i+x*1440][0] for x in range(1,7)])/6)
|
|||
|
return np.array(dataset)
|
|||
|
|
|||
|
# 归一化函数
|
|||
|
def sc_fit_transform(nDlist):
|
|||
|
# 将所有数据归一化为0-1的范围
|
|||
|
sc = MinMaxScaler(feature_range=(0, 1))
|
|||
|
dataset_transform = sc.fit_transform(X=nDlist)
|
|||
|
# 归一化后的数据
|
|||
|
return sc, np.array(dataset_transform)
|
|||
|
|
|||
|
###############################################################################
|
|||
|
# 需要之前60次的访问数据来预测下一次的数据,
|
|||
|
timestep = 60
|
|||
|
# 训练数据的大小
|
|||
|
training_num = 8640
|
|||
|
# 迭代训练10次
|
|||
|
epoch = 10
|
|||
|
# 每次取数据数量
|
|||
|
batch_size = 100
|
|||
|
###############################################################################
|
|||
|
listDataset = api_dataset()
|
|||
|
# print(listDataset.shape)
|
|||
|
# 生成训练集访问数据集
|
|||
|
xTrainDataset = listDataset[0:training_num]
|
|||
|
# 每次的下次访问次数是训练结果
|
|||
|
yTrainDataset = listDataset[1:training_num+1]
|
|||
|
|
|||
|
# 原始数据归一化
|
|||
|
scTrainDataseX, xTrainDataset = sc_fit_transform(xTrainDataset)
|
|||
|
scTrainDataseY, yTrainDataset = sc_fit_transform(yTrainDataset)
|
|||
|
|
|||
|
###############################################################################
|
|||
|
# 生成lstm模型需要的训练集数据
|
|||
|
xTrain = []
|
|||
|
for i in range(timestep, training_num):
|
|||
|
xTrain.append(xTrainDataset[i-timestep : i])
|
|||
|
xTrain = np.array(xTrain)
|
|||
|
# print(xTrain.shape)
|
|||
|
|
|||
|
yTrain = []
|
|||
|
for i in range(timestep, training_num):
|
|||
|
yTrain.append(yTrainDataset[i])
|
|||
|
yTrain = np.array(yTrain)
|
|||
|
# print(yTrain.shape)
|
|||
|
###############################################################################
|
|||
|
# 构建网络,使用的是序贯模型
|
|||
|
model = Sequential()
|
|||
|
#return_sequences=True返回的是全部输出,LSTM做第一层时,需要指定输入shape
|
|||
|
model.add(LSTM(units=128, input_shape=[xTrain.shape[1], 1]))
|
|||
|
model.add(Dense(1))
|
|||
|
# 进行配置
|
|||
|
model.compile(optimizer='adam',
|
|||
|
loss='mean_squared_error',
|
|||
|
metrics=['accuracy'])
|
|||
|
model.fit(x=xTrain, y=yTrain, epochs=epoch, batch_size=batch_size)
|
|||
|
model.save('my_model.h5')
|
|||
|
###############################################################################
|
|||
|
xTestDataset = listDataset[training_num:10080-2]
|
|||
|
scTesDatasetX, xTestDataset = sc_fit_transform(xTestDataset)
|
|||
|
|
|||
|
yTestDataset = listDataset[training_num+1:10080-1]
|
|||
|
scTestDataseY, yTestDataset = sc_fit_transform(yTestDataset)
|
|||
|
# 生成lstm模型需要的训练集数据
|
|||
|
xTest = []
|
|||
|
for i in range(timestep, len(xTestDataset)):
|
|||
|
xTest.append(xTestDataset[i-timestep : i])
|
|||
|
xTest = np.array(xTest)
|
|||
|
print(xTest.shape)
|
|||
|
yTest = []
|
|||
|
for i in range(timestep, len(xTestDataset)):
|
|||
|
yTest.append(yTestDataset[i])
|
|||
|
# 反归一化
|
|||
|
yTest = scTestDataseY.inverse_transform(X= yTest)
|
|||
|
print(yTest.shape)
|
|||
|
print(yTest)
|
|||
|
###############################################################################
|
|||
|
# 进行预测
|
|||
|
yPredictes = model.predict(x=xTest)
|
|||
|
# 反归一化
|
|||
|
yPredictes = scTestDataseY.inverse_transform(X=yPredictes)
|
|||
|
print(yPredictes.shape)
|
|||
|
print(yPredictes)
|
|||
|
###############################################################################
|
|||
|
#对比结果,绘制数据图表,红色是真实数据,蓝色是预测数据
|
|||
|
plt.plot(yTest, color='red', label='Real')
|
|||
|
plt.plot(yPredictes, color='blue', label='Predict')
|
|||
|
plt.title(label='Prediction')
|
|||
|
plt.xlabel(xlabel='Time')
|
|||
|
plt.ylabel(ylabel='Api_access_num')
|
|||
|
plt.legend()
|
|||
|
plt.show()
|
|||
|
|
|||
|
# 评估标准: mae, rmse, r2_score
|
|||
|
mae = mean_absolute_error(yTest, yPredictes)
|
|||
|
rmse = mean_squared_error(yTest, yPredictes, squared=False)
|
|||
|
r2 = r2_score(yTest, yPredictes)
|
|||
|
print(mae, rmse, r2)
|
|||
|
# 72.02636248234026 98.38626354602893 0.9791679689516253
|
|||
|
# 45.70792188492153 74.77525176850149 0.9880226807229917
|