~vonfry/cpipc-2020

7954698270b8e1527e26b83f9eab6bb11d0f185d — Vonfry 2 years ago 1ebbd5f
update other files
2 files changed, 186 insertions(+), 0 deletions(-)

A ShubinSong/R_S_Test.py
A ShubinSong/data_main.py
A ShubinSong/R_S_Test.py => ShubinSong/R_S_Test.py +84 -0
@@ 0,0 1,84 @@
import units as units
import pandas as pd
import tensorflow as tf
import numpy as np
from keras.models import load_model
import h5py
import matplotlib.pyplot as plt 

data_path = '325.xlsx'
feature_path = 'feature.xlsx'
model_path = 'RON_AND_S.h5'
Label_MIX_name = 'RON_LOSS_AND_S'
info_file = '354.xlsx'

info = pd.read_excel(info_file, sheet_name='Sheet1')
data = pd.read_excel(data_path, sheet_name='Sheet2').drop(['样本编号','时间'],axis=1) 
feature = pd.read_excel(feature_path, sheet_name='Sheet1')[Label_MIX_name].dropna(axis=0, how='any').tolist()
model = load_model(model_path)


for f in feature:
    _ron = []
    _s = []
    x_dim = []
    if f != '饱和烃':
        dlta = info['偏差'][info['位号']==f].values
        _max = info['取值范围'][info['位号']==f].values[0].split('~')[1]
        _min = info['取值范围'][info['位号']==f].values[0].split('~')[0]
        uu = info['单位'][info['位号']==f].values
        # print (dlta)
        for i in range(-80,81):            
            temp = data.loc[132][f] 
            temp = temp + i * dlta
            if temp > float(_min) - 2 * dlta and temp < float(_max) + 2 * dlta:
                x_dim.append(temp)
                _data = data.copy()
                _data.loc[132][f] = temp

                mean = _data.mean()  
                std = _data.std()            
                
                _data = (_data - mean)/std #数据归一            
                x_test = _data[feature].values #特征数据
                
                y = model.predict(x_test) #预测结果            
                y_ron = y[:,0]* std['_RON'] + mean['_RON']
                y_s = y[:,1]* std['_硫含量'] + mean['_硫含量']
                ron = pd.DataFrame(y_ron)[0].values[132]
                s = pd.DataFrame(y_s)[0].values[132]
                _ron.append(ron)
                _s.append(s)
            

        fig = plt.figure(f)
        ax1 = fig.add_subplot()
        ax1.plot(x_dim, _ron, 'b')
        ax1.set_ylabel('RON')
        # ax1.set_xlabel('UNIT' + ': ' + str(uu[0]))
        ax1.set_title(f)
        ax1.set_title(f)
        ax1.vlines(data.loc[132][f],85,90,'y',linestyles = 'dashed')
        ax1.vlines(float(_max),85,90,'gray',linestyles = 'dashed')
        ax1.vlines(float(_min),85,90,'gray',linestyles = 'dashed')
        ax2 = ax1.twinx()
        ax2.plot(x_dim, _s,'r')    
        ax2.set_xlim([x_dim[0], x_dim[len(x_dim)-1]])
        ax2.set_xlabel('X')
        print(f)
        fig.savefig('./NO5_pic/' + f + '.png')
        # print([x_dim[0], x_dim[len(x_dim)-1]])
        # print(x_dim)

# plt.show()


# print(data)









A ShubinSong/data_main.py => ShubinSong/data_main.py +102 -0
@@ 0,0 1,102 @@
import pandas as pd
import numpy as np
import math

data_sample = r'325.xlsx'
data_354_path = r'354.xlsx'

data_0 = pd.read_excel(data_sample, sheet_name = 'Sheet1')
data_354 = pd.read_excel(data_354_path, sheet_name = 'Sheet1')

data = data_0.drop(
    ['样本编号','时间','硫含量,μg/g','辛烷值RON','饱和烃,v%(烷烃+环烷烃)','烯烃,v%','芳烃,v%','溴值,gBr/100g','密度(20℃),kg/m³',
    '硫含量μg/g','_辛烷值RON','RON损失(不是变量)','焦炭wt%','Swt%','焦炭,wt%','S, wt%'],
    axis=1)

data_354 = data_354[['位号','取值范围','偏差']]

maxlist = []
minlist = []

for index_y, item in data_354.iterrows():
    maxdata = item['取值范围'].split('~')[1]
    mindata = item['取值范围'].split('~')[0]
    maxlist.append(float(maxdata))
    minlist.append(float(mindata))

ma =  pd.Series(maxlist)     
mi =  pd.Series(minlist) 

data_354['min'] = mi
data_354['max'] = ma

dlta = pd.Series(data_354['偏差'].values, index=data_354['位号'])
during_max = pd.Series(data_354['max'].values, index=data_354['位号']) + dlta
during_min = pd.Series(data_354['min'].values, index=data_354['位号']) - dlta


x = pd.Series()

for index_x, row in data.iteritems():    
    if index_x != '时间':
        j = 0
        s = 0
        for i in row: 
            j=j+1
            s=s+i
            if i == 0:
                j = j -1
        if j == 0:
            x[index_x] = 0
        else:
            x[index_x] = s / j

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间':
            if row == 0:                
                data.loc[index_y,index_x]=x[index_x]
                
v_series = pd.Series()

for index_x, row in data.iteritems():
    if index_x != '时间':
        v = []
        for i in row:        
            v.append(x[index_x] - i)

        vv = np.multiply(v,v)
        n = len(v)
        dlta_1 = math.sqrt( (1 / (n - 1) ) * sum(vv) )
        v_series[index_x] = dlta_1 * 3

del_list = []

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间':
            if abs(x[index_x] - row) > v_series[index_x] :
                print(index_y,index_x, x[index_x], row ,abs(x[index_x] - row),v_series[index_x])
                if index_y not in del_list:
                    del_list.append(index_y)
            
print(del_list)
data.drop(index = del_list , inplace = True)

del_list_2 = []

for index_y, item in data.iterrows():
    for index_x, row in item.iteritems():
        if index_x != '时间'and index_x != 'S-ZORB.SIS_LT_1001.PV' and index_x != 'S-ZORB.FT_1204.TOTAL' and index_x != 'S-ZORB.AI_2903.PV':      
            if row < during_min[index_x] or row > during_max[index_x] :
                print(index_y, index_x, during_min[index_x],row ,during_max[index_x])
                if index_y not in del_list_2 :
                    del_list_2.append(index_y)

print(del_list_2)
data.drop(index = del_list_2 , inplace = True)

print(data)

# data1 = data.mean()
# data1.to_excel("output1.xlsx",index = True)
\ No newline at end of file