记录:多元非线性回归网络模型结构调整经历
任务描述
输入数据为2dim 输出为1dim, 映射关系为非线性的
此外在后续的观察中发现数据集具有较高的不均匀性, 绘制散点图入如下:
初始模型结构
'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
第四列为所激发的光电流大小,此为网络输入(或者说预测量)'''
import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras
from pylab import *
from mpl_toolkits.mplot3d import Axes3D
'''导入csv文件'''
csv_train = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
# 测试集姑且搁置, 如果在原始数据上都无法很好的收敛那这测试集不要也罢(doge)
csv_test = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))
'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:2]# [:,np.newaxis]
Y_train = csv_train[:,2]
# 测试集
X_test = csv_test[:,0:2]
Y_test = csv_test[:,2]
'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)
'''绘制原始数据图像'''
# # print(Z_pred.shape)
# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)
# X = csv_train[:, 0]
# Y = csv_train[:, 1]
# Z = csv_train[:, 2]
# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)
# # 绘制网格
# X, Y = np.meshgrid(X, Y)
# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")
# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})
# plt.show()
'''设置网络结构'''
# 创建RBF层
class RBFLayer(Layer):
def __init__(self, units, gamma, **kwargs):
super(RBFLayer, self).__init__(**kwargs)
self.units = units
self.gamma = K.cast_to_floatx(gamma)
def build(self, input_shape):
self.mu = self.add_weight(name='mu',
shape=(int(input_shape[1]), self.units),
initializer='uniform',
trainable=True)
super(RBFLayer, self).build(input_shape)
def call(self, inputs):
diff = K.expand_dims(inputs) - self.mu
l2 = K.sum(K.pow(diff,2), axis=1)
res = K.exp(-1 * self.gamma * l2)
return res
def compute_output_shape(self, input_shape):
return (input_shape[0], self.units)
model = Sequential()
'''第一层'''
model.add(Dense(units=128, input_dim = 2, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
'''第二层'''
model.add(Dense(units=32, input_dim = 128, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))
'''第三层'''
# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=8, input_dim=32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(Activation('tanh'))
'''第四层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
'''激活函数'''
# model.add(Activation('elu')) # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu')) # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign')) # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu')) # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh')) # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid')) # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential')) # 一阶段收敛于 <cost = 131>
model.add(Activation('linear')) # 一阶段收敛于 <cost = 0.24>
'''绘制网络结构'''
model.summary()
'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True) # 二阶段在<step = 100000> 收敛于 <cost = 10 >
# SGD 因为更新比较频繁,会造成 cost function 有严重的震荡
sgd = SGD(lr=5) # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0) # 二阶段在<step = 70000> 收敛于 <cost = 250>
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0) # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) # 二阶段在<step = 110000> 收敛于 <cost = 30 >
# 设置优化器和损失函数
model.compile(optimizer=Adam, loss='mse')
'''训练'''
for step in range(50001):
loss = model.train_on_batch(X_train, Y_train)
# loss,accuracy = model.evaluate(X_test, Y_test, verbose=0)
# print(model.evaluate(X_test,Y_test))
if step % 500 == 0:
print("The step is ", step , "......................................" + '[loss]:', loss)
# "............." + '[cost, acc_train]:', train_cost_acc,
# "............." + '[lost, acc_test]:', loss, accuracy)
'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)
# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()
# 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)
# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)
# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 2]
# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)
# # 绘制网格
# X, Y = np.meshgrid(X, Y)
# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")
# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))
# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})
# plt.show()
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})
# show()
'''保存模型'''
save_path = r'D:\anaconda\project\Spectral_analysis\model_save\3D\U&T&I_1L_weight.h5'
model.save_weights(save_path)
# model.save(save_path) # 创建 HDF5 文件 'my_model.h5'
# model = load_model('my_model.h5')
# '''模型可视化(Graphviz)'''
# from keras.utils import plot_model
# plot_model(model, to_file='model.png')
过程
step=100000 左右
1. 加入Dropout
loss大幅上升
2. leakyrelu换为普通relu
无明显影响
3. 加入RBFLayer
无明显影响
4. 增加层数
无明显影响
5. 更换优化器
(1)RMSprop
loss值很大波动, 但其总趋势收敛, 在100000轮次收敛于30左右
穷举法都试了一边, 发现模型进入瓶颈了, 猜测只能通过更换结构进行新的尝试
6. 加深网络结构, 加大参数量
由:
改为:
实际并未收敛…
7. 于前几层内加入tanh激活函数层
130k轮后模型收敛至loss=89,收敛效率较大下降, 但最终是否会和往常一样陷入局部最优解尚未可知
200k轮后收敛于loss=30, 貌似还有收敛的空间, 继续进行第三轮100k轮训练,
250k轮后模型收敛于loss=11
跑了两个五十万轮, 基本上是确定收敛到8.9这样子了
8. 后续将对网络结构进行修改, 加入一些独特的层进行调整,
先去上课了, 回来继续