~vonfry/cpipc-2020

ref: 46e3745a8a5d85da7ad4dd5cb9fbd42632fe9b2f cpipc-2020/2predict.r -rw-r--r-- 1.5 KiB
46e3745aVonfry Merge tag 'finish' into develop 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
library(openxlsx)
read.xlsx("./data/processed.xlsx") -> processed

# method 1, linear regression
# lm_result <- lm(RONL ~ PC1 + PC2 + PC3 + PC4 + PC5 + PC6 + PC7 + PC8 + PC9 + PC10 +
#                   PC11 + PC12 + PC13 + PC14 + PC15 + PC16 + PC17 + PC18 + PC19 +
#                   PC20 + PC21 + PC22 + PC23 + PC24,
#               processed)

# method 2: BP neur network
train_and_test <- function(df, name = '') {
    smp_size <- floor(0.8 * nrow(df))
    set.seed(2020)
    train_ind <- sample(seq_len(nrow(df)), size = smp_size)

    dfs <- scale(df, scale = T)
    dfsdf <- as.data.frame(dfs)
    dfsdf[, "RONL"] <- processed["RON损失(不是变量)"]

    train <- dfsdf[train_ind, ]
    test <- dfsdf[-train_ind, ]

    library(neuralnet)
    net <- neuralnet(RONL ~ .,
                     train, hidden = 10, rep = 100, linear.output = T)
    saveRDS(net, sprintf("./data/bpnet_%s.rds", name))
    compute(net, train) -> train_result
    compute(net, test) -> test_result
    library(MLmetrics)
    message("train MSE:", MSE(train_result$net.result, train$RONL))
    message("train R2: ", R2_Score(train_result$net.result, train$RONL))
    message("test MSE: ", MSE(test_result$net.result, test$RONL))
    message("test R2:  ", R2_Score(test_result$net.result, test$RONL))
    library(ggplot2)
}

df <- read.csv("./data/data-after-pca.csv")
train_and_test(df, 'pca_24')

# another pca data set
df2 <- read.xlsx("./data/主成分数据(1).xlsx", cols = seq(3, 25), sheet = "主成分数据表1")

train_and_test(df2, 'pca_k')