当前位置: 首页 > biancheng >正文

记录:多元非线性回归网络模型结构调整经历

任务描述

输入数据为2dim 输出为1dim, 映射关系为非线性的

此外在后续的观察中发现数据集具有较高的不均匀性, 绘制散点图入如下:
在这里插入图片描述

初始模型结构

'''导入的CSV文件的前三列分别为层数、TiO2层厚度和外加电压, 此为神经网络的输入
   第四列为所激发的光电流大小,此为网络输入(或者说预测量)'''

import csv
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Layer
from keras import backend as K
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import SGD
from keras.optimizers import adam
from keras.layers import Activation
import keras

from pylab import *
from mpl_toolkits.mplot3d import Axes3D


'''导入csv文件'''
csv_train = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('train.csv导入成功!')
# 测试集姑且搁置, 如果在原始数据上都无法很好的收敛那这测试集不要也罢(doge)
csv_test = np.loadtxt(open("./data/2D_in_without_N/U&T&I_1L.csv","rb"),delimiter=",",skiprows=0)#返回的数据为ndarry
print('test.csv导入成功!')
# data_shape = csv_data.shape #返回数据的维度 
# data_dim = csv_data.ndim #ndarry的秩
# [m, n] = data_shape # 返回数据的行数和列数
# print("cav_data.dim = ",data_dim)
# print("cav_data.shape = ",data_shape)
# print("cav_data m ={0}, cav_data m ={1}".format(m, n))

'''选取训练集及测试集'''
# 训练集
X_train = csv_train[:,0:2]# [:,np.newaxis]
Y_train = csv_train[:,2]
# 测试集
X_test = csv_test[:,0:2]
Y_test = csv_test[:,2]

'''检查输入形状'''
# print("==============================================================================")
# print(X_train[:, 0].shape)
# print(X_train)
# print("==============================================================================")
# print(Y_train)
# print(Y_train.shape)

'''绘制原始数据图像'''
# # print(Z_pred.shape)

# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)

# X = csv_train[:, 0]
# Y = csv_train[:, 1]
# Z = csv_train[:, 2]

# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)

# # 绘制网格
# X, Y = np.meshgrid(X, Y)

# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")


# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))

# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})

# plt.show()



'''设置网络结构'''
# 创建RBF层
class RBFLayer(Layer):
    def __init__(self, units, gamma, **kwargs):
        super(RBFLayer, self).__init__(**kwargs)
        self.units = units
        self.gamma = K.cast_to_floatx(gamma)

    def build(self, input_shape):
        self.mu = self.add_weight(name='mu',
                                  shape=(int(input_shape[1]), self.units),
                                  initializer='uniform',
                                  trainable=True)
        super(RBFLayer, self).build(input_shape)

    def call(self, inputs):
        diff = K.expand_dims(inputs) - self.mu
        l2 = K.sum(K.pow(diff,2), axis=1)
        res = K.exp(-1 * self.gamma * l2)
        return res

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.units)

model = Sequential()

'''第一层'''
model.add(Dense(units=128, input_dim = 2, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
# model.add(Activation(keras.layers.advanced_activations.LeakyReLU(alpha=0.2)))
# 增加leakyrelu可以显著降低lost
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))

'''第二层'''
model.add(Dense(units=32, input_dim = 128, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())
model.add(keras.layers.advanced_activations.LeakyReLU(alpha=0.2))
# model.add(Activation('tanh'))

'''第三层'''
# model.add(RBFLayer(32, 0.5))
# dropout严重降低了回归结果
# model.add(keras.layers.Dropout(0.5))
model.add(Dense(units=8, input_dim=32, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
# model.add(Activation('tanh'))

'''第四层'''
model.add(Dense(units=1, input_dim = 8, kernel_initializer=keras.initializers.random_normal(stddev=0.01)))
model.add(keras.layers.normalization.BatchNormalization())

'''激活函数'''
# model.add(Activation('elu'))            # 一阶段收敛于 <cost = 120>
# model.add(Activation('selu'))           # 一阶段收敛于 <cost = 111>
# model.add(Activation('softplus'))       # 一阶段收敛于 <cost = 131>
# model.add(Activation('softsign'))       # 一阶段收敛于 <cost = 284>
# model.add(Activation('relu'))           # 一阶段收敛于 <cost = 131>
# model.add(Activation('tanh'))           # 一阶段收敛于 <cost = 281>
# model.add(Activation('sigmoid'))        # 一阶段收敛于 <cost = 293>
# model.add(Activation('hard_sigmoid'))   # 一阶段收敛于 <cost = 293>
# model.add(Activation('exponential'))    # 一阶段收敛于 <cost = 131>
model.add(Activation('linear'))           # 一阶段收敛于 <cost = 0.24>

'''绘制网络结构'''
model.summary()

'''选择优化器'''
Adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=True)  # 二阶段在<step = 100000> 收敛于 <cost = 10 >
# SGD 因为更新比较频繁,会造成 cost function 有严重的震荡
sgd = SGD(lr=5)                                                                                         # Nan
RMSprop = keras.optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)                          # 二阶段在<step = 100000> 收敛于 <cost = 10000>
Adagrad = keras.optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0)                                    # 二阶段在<step = 100000> 收敛于 <cost = 50000>
Adadelta = keras.optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)                         # 二阶段在<step = 70000>  收敛于 <cost = 250> 
Adamax = keras.optimizers.Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)           # 二阶段在<step = 100000> 收敛于 <cost = 22 >
Nadam = keras.optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004)  # 二阶段在<step = 110000> 收敛于 <cost = 30 > 

# 设置优化器和损失函数
model.compile(optimizer=Adam, loss='mse')

'''训练'''
for step in range(50001):
   loss = model.train_on_batch(X_train, Y_train)
   # loss,accuracy = model.evaluate(X_test, Y_test, verbose=0)
   # print(model.evaluate(X_test,Y_test))
   if step % 500 == 0:
       print("The step is ", step , "......................................" + '[loss]:', loss)
             # "............." + '[cost, acc_train]:', train_cost_acc,
             # "............." + '[lost, acc_test]:', loss, accuracy)





'''plt绘图'''
# # 2D
# # y_pred = model.predict(X_train)
# # plt.figure(figsize=(16, 9))
# # plt.subplot(1,3,1)
# # plt.scatter(X_train[:, 0], Y_train)
# # plt.plot(X_train[:, 0], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,2)
# # plt.scatter(X_train[:, 1], Y_train)
# # plt.plot(X_train[:, 1], y_pred, 'r-', lw=3)

# # plt.subplot(1,3,3)
# # plt.scatter(X_train[:, 2], Y_train)
# # plt.plot(X_train[:, 2], y_pred, 'r-', lw=3)
# # plt.show()

# 3D
# Z_pred = model.predict(X_train)
# # print(Z_pred.shape)

# fig = figure(figsize=(16, 9))
# # ax = Axes3D(fig)
# ax = Axes3D(fig,auto_add_to_figure=False)
# fig.add_axes(ax)

# X = X_train[:, 0]
# Y = X_train[:, 1]
# Z = csv_train[:, 2]

# '''数据切片(减少计算量)'''
# s = slice(0, 1420, 2)
# x = X[s]
# y = Y[s]
# z = Z[s]
# z_ = Z_pred[s]
# # print(x.shape)

# # 绘制网格
# X, Y = np.meshgrid(X, Y)

# '''形状检查'''
# # print("==============================================================================")
# # print(X.shape)
# # print(Y.shape)
# # print(Z.shape)
# # print("==============================================================================")


# '''绘制'''
# ax.scatter(x, y, z)
# # ax.plot_surface(x, y, z_, rstride=1, cstride=1, cmap=plt.get_cmap('rainbow'))

# # 加标签(坐标轴)
# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('U', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('Thickness', fontdict={'size': 15, 'color': 'green'})

# plt.show()

# ax.set_zlabel('I', fontdict={'size': 15, 'color': 'red'})
# ax.set_ylabel('Thickness', fontdict={'size': 15, 'color': 'blue'})
# ax.set_xlabel('layer', fontdict={'size': 15, 'color': 'green'})

# show()


'''保存模型'''
save_path = r'D:\anaconda\project\Spectral_analysis\model_save\3D\U&T&I_1L_weight.h5'
model.save_weights(save_path)
# model.save(save_path)  # 创建 HDF5 文件 'my_model.h5'
# model = load_model('my_model.h5')






# '''模型可视化(Graphviz)'''
# from keras.utils import plot_model
# plot_model(model, to_file='model.png')

过程

step=100000 左右

1. 加入Dropout

loss大幅上升

2. leakyrelu换为普通relu

无明显影响

3. 加入RBFLayer

无明显影响

4. 增加层数

无明显影响

5. 更换优化器

(1)RMSprop

loss值很大波动, 但其总趋势收敛, 在100000轮次收敛于30左右

穷举法都试了一边, 发现模型进入瓶颈了, 猜测只能通过更换结构进行新的尝试

6. 加深网络结构, 加大参数量

由:
在这里插入图片描述
改为:
在这里插入图片描述
实际并未收敛…
在这里插入图片描述

7. 于前几层内加入tanh激活函数层

在这里插入图片描述130k轮后模型收敛至loss=89,收敛效率较大下降, 但最终是否会和往常一样陷入局部最优解尚未可知

200k轮后收敛于loss=30, 貌似还有收敛的空间, 继续进行第三轮100k轮训练,

250k轮后模型收敛于loss=11

跑了两个五十万轮, 基本上是确定收敛到8.9这样子了
在这里插入图片描述

8. 后续将对网络结构进行修改, 加入一些独特的层进行调整,

先去上课了, 回来继续

相关文章:

  • 牛客练习赛#84 F 莫比乌斯反演+杜教筛+技巧+斐波那契数列和gcd的结论+矩阵快速幂
  • ZZNUOJ_用C语言编写程序实现1342:支配值数目(附完整源码)
  • java毕业设计后勤管理系统餐饮评价监督系统(附源码、数据库)
  • 前端基础学习笔记
  • 【TS】联合类型--类型断言--类型推断
  • 谈笑风声的秘密
  • QT影城网上售票系统
  • NetCDF数据在ArcMap中的使用
  • 打怪升级(考验思路)
  • 持续精进,改变自己