965-R-Learning-Repo/Group_Default-R/main.R

55 lines
1.6 KiB
R

# Default-R project by 965
# Dependencies
install.packages(c("caTools", "glm2", "caret", "randomForest", "pROC", "corrplot", "MASS", "car", "carData", "farver"))
library(caTools)
library(glm2)
library(caret)
library(randomForest)
library(pROC)
library(corrplot)
library(MASS)
library(car)
library(farver)
# Preprocessing data
weather_data <- read.csv("data.csv")
weather_data$rain_numeric <- ifelse(weather_data$Rain == "rain", 1, 0)
# str(weather_data)
# Split data sheet
set.seed(841524)
split <- sample.split(weather_data$rain_numeric, SplitRatio = 0.7)
train_data <- subset(weather_data, split == TRUE)
test_data <- subset(weather_data, split == FALSE)
# Constructing Model & Predict & Evaluate
'model <- glm(train_data$rain_numeric ~ train_data$Temperature +
train_data$Pressure + train_data$Cloud_Cover +
train_data$Wind_Speed + train_data$Humidity,
data = train_data, family = binomial)'
model <- randomForest(train_data$rain_numeric ~ ., data = train_data, class.weight = c(0, 2))
predictions <- predict(model, newdata = test_data, type = "response")
confusionMatrix(as.factor(ifelse(predictions > 0.5, 1, 0)),
as.factor(test_data$rain_numeric))
plot(model)
roc_obj <- roc(test_data$rain, predictions)
plot(roc_obj)
# Step Model & Extract Main factor
full_model <- lm(weather_data$rain_numeric ~ weather_data$Temperature +
weather_data$Pressure + weather_data$Cloud_Cover +
weather_data$Wind_Speed + weather_data$Humidity,
data = weather_data)
step_model <- step(full_model, direction = "both")
summary(step_model)
influencePlot(step_model)