~vonfry/cpipc-2020

467ae1c136dcf5fde49081d3e789ec39c6992902 — Vonfry 2 years ago 86acbc1 + e496808 finish
Merge branch 'release/finish'
3 files changed, 238 insertions(+), 30 deletions(-)

M 3optimization.py
A ShubinSong/R_S_Test.py
A ShubinSong/data_main.py
M 3optimization.py => 3optimization.py +52 -30
@@ 36,6 36,10 @@ norm_ron_loss_zero = (0 - df_ron_loss.mean()) / df_ron_loss.std()
df_s = df['_硫含量']
norm_s = (5 - df_s.mean()) / df_s.std()
norm_s_zero = (0 - df_s.mean()) / df_s.std()
df_std_s = df.std()['_硫含量']
df_mean_s = df.mean()['_硫含量']
df_std_ron_loss = df.std()['RON损失']
df_mean_ron_loss = df.mean()['RON损失']

def merge_feature_args(unmodified, modified):
    for i in range(0, len(unmodified_feature_index)):


@@ 54,8 58,8 @@ def fitness(fix_args, rowindex):
        if not (pred_s > norm_s_zero and pred_s <= norm_s):
            return 1
        # step3: compute RON LOSS RATE
        ron_loss = df_ron_loss_norm.iloc[rowindex]
        loss_reduce_rate = (ron_loss - pred_ron_loss ) / ron_loss
        ron_loss = df_ron_loss.iloc[rowindex]
        loss_reduce_rate = (ron_loss - (pred_ron_loss * df_std_ron_loss + df_mean_ron_loss) ) / ron_loss
        if loss_reduce_rate > 0.8 :
            return 0
        if loss_reduce_rate < 0.3:


@@ 73,7 77,7 @@ def calculate_minimum(rowindex):
    fix_args = row[unmodified_feature]
    fix_means = df[unmodified_feature].mean()
    fix_std = df[unmodified_feature].std()
    norm_fix_args = ((fix_args - fix_means) * fix_std).values.tolist()
    norm_fix_args = ((fix_args - fix_means) / fix_std).values.tolist()

    ga = GA(func=fitness(norm_fix_args, rowindex),
            size_pop=50, max_iter=800,


@@ 92,39 96,57 @@ def calculate_minimum(rowindex):
    calcount += 1
    return fix_args.tolist() + original_best_x

optimzation = list(map(calculate_minimum, range(0, len(df))))
optimization = list(map(calculate_minimum, range(0, len(df))))

df_original_best_x = pd.DataFrame(optimzation, columns = unmodified_feature + modified_feature)
df_original_best_x = pd.DataFrame(optimization, columns = unmodified_feature + modified_feature)

df_original_best_x.index = df.index

df_original_best_x.to_csv('./data/optimization.csv')

# df_original_best_x = pd.read_csv('./data/optimization.csv', index_col = 0)
df_original_best_x_norm = (df_original_best_x[feature] - df[feature].mean()) / df[feature].std()
optimization_ron_loss_and_s  = model.predict(df_original_best_x_norm[feature])
df_optimization_ron_loss_and_s = pd.DataFrame(optimization_ron_loss_and_s, columns = ['ron_loss', 's'])
original_optimization_ron_loss = df_optimization_ron_loss_and_s['ron_loss'] * df_std_ron_loss + df_mean_ron_loss
original_optimization_s = df_optimization_ron_loss_and_s['s'] * df_std_s + df_mean_s
optimization_analysis_ron_loss_rate_mean = ((df_ron_loss - original_optimization_ron_loss) / df_ron_loss).mean()
optimization_analysis = pd.DataFrame({
    'ron_loss_rate_mean': [optimization_analysis_ron_loss_rate_mean],
    's_mean': [original_optimization_s.mean()]
})
optimization_analysis.to_csv('./data/optimization_analysis.csv')

diff = df[feature] - df_original_best_x
diff.to_csv('./data/optimization-diff.csv')

diff_mean = diff.mean()
diff.to_csv('./data/optimization-diff-mean.csv')
diff_mean.to_csv('./data/optimization-diff-mean.csv')

diff_norm = (diff - diff_mean) / diff.std()

# df_original_best_x = pd.read_csv('./data/optimization.csv', index_col = 0)
df_original_best_x_norm = (df_original_best_x - df_original_best_x.mean()) / df_original_best_x.std()

df_norm = (df - df.mean()) / df.std()

df_133_ron_loss_norm = df_norm.iloc[132]['RON损失']
df_std_s = df.std()['_硫含量']
df_mean_s = df.mean()['_硫含量']
df_std_ron_loss = df.std()['RON损失']
df_mean_ron_loss = df.mean()['RON损失']
pred_opted_133 = model.pred(np.array([df_original_best_x_norm[feature].iloc[132]]))[0]
optimization_133_analysis = pd.DataFrame({
    'optimized_s': pred_opted_133[1],
    'optimized_ron_loss': pred_opted_133[0],
    'ron_loss_rate': (pred_opted_133[0]- df_133_ron_loss_norm) / df_133_ron_loss_norm
sample_index = 132
df_sample_ron_loss = df.iloc[sample_index]['RON损失']
df_sample_ron_loss_norm = df_norm.iloc[sample_index]['RON损失']
pred_opted_sample = model.predict(np.array([df_original_best_x_norm[feature].iloc[sample_index]]))[0]
optimization_sample_original_s = pred_opted_sample[1] * df_std_s + df_mean_s
optimization_sample_original_ron_loss = pred_opted_sample[0] * df_std_ron_loss + df_mean_ron_loss
optimization_sample_ron_loss_rate = (df_sample_ron_loss - optimization_sample_original_ron_loss) / df_sample_ron_loss
optimization_sample_analysis = pd.DataFrame({
    'optimized_s': [optimization_sample_original_s],
    'optimized_ron_loss': [optimization_sample_original_ron_loss],
    'ron_loss_rate': [optimization_sample_ron_loss_rate]
})
optimization_133_analysis.to_csv('./data/optimization_133_analysis.csv')
optimization_sample_analysis.to_csv('./data/optimization_sample_analysis.csv')

optimization_and_original_sample = pd.DataFrame(
    np.array([optimization[sample_index], df[unmodified_feature + modified_feature].iloc[sample_index]]),
    columns = unmodified_feature + modified_feature,
    index = ['optimized', 'original']
)
optimization_and_original_sample.to_csv('./data/optimization-sample-both.csv')

import matplotlib.pyplot as plt



@@ 136,14 158,14 @@ axe_diff.boxplot(diff_norm.drop(['饱和烃'], axis=1).T,
axe_diff.hlines(0, 0.5, 16.5, colors='C8', zorder=3)
fig_diff.savefig('./output/optimization-diff.jpg')

fig_133, axe_133 = plt.subplots()
width_133 = 0.35
fig_sample, axe_sample = plt.subplots()
width_sample = 0.35
x = np.arange(1, len(feature) + 1)
rects1 = axe_133.bar(x - width_133/2, df_norm[feature].iloc[132].values, width_133, label='Original')
rects2 = axe_133.bar(x + width_133/2, df_original_best_x_norm[feature].iloc[132].values, width_133, label='Optimized')
axe_133.set_xticks(x)
axe_133.set_xticklabels([ 'D' + str(i + 1) for i in range(0, len(feature))])
axe_133.set_ylabel('norm values')
axe_133.legend()
fig_133.tight_layout()
fig_133.savefig('./output/optimization-133.jpg')
rects1 = axe_sample.bar(x - width_sample/2, df_norm[feature].iloc[sample_index].values, width_sample, label='Original')
rects2 = axe_sample.bar(x + width_sample/2, df_original_best_x_norm[feature].iloc[sample_index].values, width_sample, label='Optimized')
axe_sample.set_xticks(x)
axe_sample.set_xticklabels([ 'D' + str(i + 1) for i in range(0, len(feature))])
axe_sample.set_ylabel('norm values')
axe_sample.legend()
fig_sample.tight_layout()
fig_sample.savefig('./output/optimization-sample.jpg')

A ShubinSong/R_S_Test.py => ShubinSong/R_S_Test.py +84 -0
@@ 0,0 1,84 @@
import units as units
import pandas as pd
import tensorflow as tf
import numpy as np
from keras.models import load_model
import h5py
import matplotlib.pyplot as plt 

data_path = '325.xlsx'
feature_path = 'feature.xlsx'
model_path = 'RON_AND_S.h5'
Label_MIX_name = 'RON_LOSS_AND_S'
info_file = '354.xlsx'

info = pd.read_excel(info_file, sheet_name='Sheet1')
data = pd.read_excel(data_path, sheet_name='Sheet2').drop(['样本编号','时间'],axis=1) 
feature = pd.read_excel(feature_path, sheet_name='Sheet1')[Label_MIX_name].dropna(axis=0, how='any').tolist()
model = load_model(model_path)


for f in feature:
    _ron = []
    _s = []
    x_dim = []
    if f != '饱和烃':
        dlta = info['偏差'][info['位号']==f].values
        _max = info['取值范围'][info['位号']==f].values[0].split('~')[1]
        _min = info['取值范围'][info['位号']==f].values[0].split('~')[0]
        uu = info['单位'][info['位号']==f].values
        # print (dlta)
        for i in range(-80,81):            
            temp = data.loc[132][f] 
            temp = temp + i * dlta
            if temp > float(_min) - 2 * dlta and temp < float(_max) + 2 * dlta:
                x_dim.append(temp)
                _data = data.copy()
                _data.loc[132][f] = temp

                mean = _data.mean()  
                std = _data.std()            
                
                _data = (_data - mean)/std #数据归一            
                x_test = _data[feature].values #特征数据
                
                y = model.predict(x_test) #预测结果            
                y_ron = y[:,0]* std['_RON'] + mean['_RON']
                y_s = y[:,1]* std['_硫含量'] + mean['_硫含量']
                ron = pd.DataFrame(y_ron)[0].values[132]
                s = pd.DataFrame(y_s)[0].values[132]
                _ron.append(ron)
                _s.append(s)
            

        fig = plt.figure(f)
        ax1 = fig.add_subplot()
        ax1.plot(x_dim, _ron, 'b')
        ax1.set_ylabel('RON')
        # ax1.set_xlabel('UNIT' + ': ' + str(uu[0]))
        ax1.set_title(f)
        ax1.set_title(f)
        ax1.vlines(data.loc[132][f],85,90,'y',linestyles = 'dashed')
        ax1.vlines(float(_max),85,90,'gray',linestyles = 'dashed')
        ax1.vlines(float(_min),85,90,'gray',linestyles = 'dashed')
        ax2 = ax1.twinx()
        ax2.plot(x_dim, _s,'r')    
        ax2.set_xlim([x_dim[0], x_dim[len(x_dim)-1]])
        ax2.set_xlabel('X')
        print(f)
        fig.savefig('./NO5_pic/' + f + '.png')
        # print([x_dim[0], x_dim[len(x_dim)-1]])
        # print(x_dim)

# plt.show()


# print(data)









A ShubinSong/data_main.py => ShubinSong/data_main.py +102 -0
@@ 0,0 1,102 @@
import pandas as pd
import numpy as np
import math

data_sample = r'325.xlsx'
data_354_path = r'354.xlsx'

data_0 = pd.read_excel(data_sample, sheet_name = 'Sheet1')
data_354 = pd.read_excel(data_354_path, sheet_name = 'Sheet1')

data = data_0.drop(
    ['样本编号','时间','硫含量,μg/g','辛烷值RON','饱和烃,v%(烷烃+环烷烃)','烯烃,v%','芳烃,v%','溴值,gBr/100g','密度(20℃),kg/m³',
    '硫含量μg/g','_辛烷值RON','RON损失(不是变量)','焦炭wt%','Swt%','焦炭,wt%','S, wt%'],
    axis=1)

data_354 = data_354[['位号','取值范围','偏差']]

maxlist = []
minlist = []

for index_y, item in data_354.iterrows():
    maxdata = item['取值范围'].split('~')[1]
    mindata = item['取值范围'].split('~')[0]
    maxlist.append(float(maxdata))
    minlist.append(float(mindata))

ma =  pd.Series(maxlist)     
mi =  pd.Series(minlist) 

data_354['min'] = mi
data_354['max'] = ma

dlta = pd.Series(data_354['偏差'].values, index=data_354['位号'])
during_max = pd.Series(data_354['max'].values, index=data_354['位号']) + dlta
during_min = pd.Series(data_354['min'].values, index=data_354['位号']) - dlta


x = pd.Series()

for index_x, row in data.iteritems():    
    if index_x != '时间':
        j = 0
        s = 0
        for i in row: 
            j=j+1
            s=s+i
            if i == 0:
                j = j -1
        if j == 0:
            x[index_x] = 0
        else:
            x[index_x] = s / j

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间':
            if row == 0:                
                data.loc[index_y,index_x]=x[index_x]
                
v_series = pd.Series()

for index_x, row in data.iteritems():
    if index_x != '时间':
        v = []
        for i in row:        
            v.append(x[index_x] - i)

        vv = np.multiply(v,v)
        n = len(v)
        dlta_1 = math.sqrt( (1 / (n - 1) ) * sum(vv) )
        v_series[index_x] = dlta_1 * 3

del_list = []

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间':
            if abs(x[index_x] - row) > v_series[index_x] :
                print(index_y,index_x, x[index_x], row ,abs(x[index_x] - row),v_series[index_x])
                if index_y not in del_list:
                    del_list.append(index_y)
            
print(del_list)
data.drop(index = del_list , inplace = True)

del_list_2 = []

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间'and index_x != 'S-ZORB.SIS_LT_1001.PV' and index_x != 'S-ZORB.FT_1204.TOTAL' and index_x != 'S-ZORB.AI_2903.PV':      
            if row < during_min[index_x] or row > during_max[index_x] :
                print(index_y, index_x, during_min[index_x],row ,during_max[index_x])
                if index_y not in del_list_2 :
                    del_list_2.append(index_y)

print(del_list_2)
data.drop(index = del_list_2 , inplace = True)

print(data)

# data1 = data.mean()
# data1.to_excel("output1.xlsx",index = True)
\ No newline at end of file