~vonfry/cpipc-2020

ref: 5286b92d10e02f4f3902279c0e912b9e8ea5f8fa cpipc-2020/2predict.r -rw-r--r-- 2.2 KiB
5286b92dVonfry R: add draw plot 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
library(openxlsx)
read.xlsx("./data/processed.xlsx") -> processed

# method 1, linear regression
# lm_result <- lm(RONL ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 +
#                   PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + PC19 +
#                   PC20 + PC21 + PC22 + PC23 + PC24,
#               processed)

# method 2: BP neur network
train_and_test <- function(df, name = '') {
    smp_size <- floor(0.8 * nrow(df))
    set.seed(2020)
    train_ind <- sample(seq_len(nrow(df)), size = smp_size)

    dfs <- scale(df, scale = T)
    dfsdf <- as.data.frame(dfs)
    dfsdf[, "RONL"] <- processed["RON损失(不是变量)"]

    train <- dfsdf[train_ind, ]
    test <- dfsdf[-train_ind, ]

    library(neuralnet)
    net <- neuralnet(RONL ~ .,
                     train, hidden = 10, rep = 100, linear.output = T)
    saveRDS(net, sprintf("./data/bpnet_%s.rds", name))
    compute(net, train) -> train_result
    compute(net, test) -> test_result
    library(MLmetrics)
    message("train MSE:", MSE(train_result$net.result, train$RONL))
    message("train R2: ", R2_Score(train_result$net.result, train$RONL))
    message("test MSE: ", MSE(test_result$net.result, test$RONL))
    message("test R2:  ", R2_Score(test_result$net.result, test$RONL))
    library(ggplot2)
}

df <- read.csv("./data/data-after-pca.csv")
train_and_test(df, 'pca_24')

# another pca data set
df2 <- read.xlsx("./data/主成分数据(1).xlsx", cols = seq(3, 25), sheet = "主成分数据表1")

train_and_test(df2, 'pca_k')

# draw plot with net
# c("S-ZORB.PC_3301.DACA",
#   "S-ZORB.FC_1102.PV",
#   "S-ZORB.TC_1607.DACA",
#   "S-ZORB.AT-0004.DACA.PV",
#   "S-ZORB.FT_3702.DACA",
#   "饱和烃",
#   "S-ZORB.AT-0003.DACA.PV",
#   "S-ZORB.TXE_2203A.DACA",
#   "S-ZORB.FT_2502.DACA",
#   "S-ZORB.AT_6201.DACA",
#   "S-ZORB.PC_3101.DACA",
#   "S-ZORB.FT_1001.PV",
#   "S-ZORB.AT-0001.DACA.PV",
#   "S-ZORB.AT-0009.DACA.PV",
#   "S-ZORB.TXE_2202A.DACA",
#   "S-ZORB.PDT_3601.DACA",
#   "S-ZORB.FC_2801.PV") -> feature
# df[, feature] -> df_
#
# colnames(df_) <- paste("D", seq(1,17), sep='')
# df_[, "RONL"] <- processed["RON损失(不是变量)"]
# net_ <- neuralnet(RONL ~ ., df_, hidden = 10, rep = 100, linear.output = T)
# plot(net_)