Run Code
|
API
|
Code Wall
|
Misc
|
Feedback
|
Login
|
Theme
|
Privacy
|
Patreon
Monroe Week 9 Homework
### Step 1: Load the data #Load your libraries library("kernlab") library("ggplot2") library("e1071") library("gridExtra") # Let go back and analyze the air quality dataset (if you remember, we used that previously, in the visualization lab). Remember to think about how to deal with the NAs in the data. myairquality <- airquality myairquality$Ozone[is.na(myairquality$Ozone)] <- round(mean(myairquality$Ozone, na.rm = TRUE)) myairquality$Solar.R[is.na(myairquality$Solar.R)] <- round(mean(myairquality$Solar.R, na.rm = TRUE)) myairquality ### Step 2: Create train and test data sets one for training and one for testing. ## 75% of the sample size smp_size <- floor(0.75 * nrow(myairquality)) ## set the seed to make your partition reproducible set.seed(123) train_ind <- sample(seq_len(nrow(myairquality)), size = smp_size) train <- myairquality[train_ind, ] test <- myairquality[-train_ind, ] test ### Step 3: Build a Model using KSVM & visualize the results # Build a model (using the ksvm function, trying to predict onzone). You can use all the possible attributes, or select the attributes that you think would be the most helpful. modelKSVM <- ksvm(Ozone ~ ., data = myairquality) modelKSVM predictOzone <- function(a, myairquality){ predictedOzone <- predict(a, myairquality) results1 <- table(predictedOzone, myairquality$Ozone) print(results1) percentCorrect1 <- (results1[1,1]+results1[2,2])/(results1[1,1]+results1[1,2]+results1[2,1]+results1[2,2])*100 round(percentCorrect1) return(percentCorrect1) } predictOzone(modelKSVM, myairquality) # 1) Test the model on the testing dataset, 2) compute the Root Mean Squared Error #3) Plot the results. Use a scatter plot. Have the x-axis represent temperature, the y-axis represent wind, the point size and color represent the error, as defined by the actual ozone level minus the predicted ozone level). root_square <- function(error) { sqrt(mean(error^2)) } modelKSVM.first <- predict(modelKSVM, myairquality) modelKSVM.error <- (myairquality$Ozone - modelKSVM.first) root_square(modelKSVM.error) # Compute models and plot the results for svm (in the e1071 package) and lm. Generate similar charts for each model # Show all three results (charts) in one window, using the grid.arrange function ### Step 4: Create a goodOzone variable # This variable should be either 0 or 1. It should be 0 if the ozone is below the average for all the data observations, and 1 if it is equal to or above the average ozone observed. goodOzone <-c() for (i in 1:nrow(myairquality)) { if (myairquality$Ozone[i] < mean(myairquality$Ozone)){ #cat(i, "goodozone", "\n") myairquality$goodOzone[i] <- 0 } else { myairquality$goodOzone[i] <- 1 #cat(i, "badozone", "\n") } } predictGoodozone <- function(m, myairquality){ predictedGoodozone <- predict(m, myairquality) predictedGoodozone myairquality$Ozone results1 <- table(predictedGoodozone, myairquality$goodOzone) print(results1) percentCorrect1 <- (results1[1,1]+results1[2,2])/(results1[1,1]+results1[1,2]+results1[2,1]+results1[2,2])*100 round(percentCorrect1) return(percentCorrect1) } modelKSVM1 <- ksvm(goodOzone ~ ., data = myairquality) modelKSVM1 predictGoodozone(modelKSVM1, myairquality) ### Step 5: See if we can do a better job predicting good and bad days # Build a model (using the ksvm function, trying to predict goodOzone). You can use all the possible attributes, or select the attributes that you think would be the most helpful. modelKSVM <- ksvm(Ozone ~ ., data = myairquality) predictOzone(modelKSVM, myairquality) modelKSVM.first <- predict(modelKSVM, myairquality) modelKSVM.error <- (myairquality$Ozone - modelKSVM.first) root_square(modelKSVM.error) modelSVM <- svm(Ozone ~ ., data = myairquality) predictOzone(modelSVM, myairquality) modelSVM.first <- predict(modelSVM, myairquality) modelSVM.error <- (myairquality$Ozone - modelSVM.first) root_square(modelSVM.error) dfnew <- data.frame(myairquality$Wind,myairquality$Temp,modelKSVM.error) colnames(dfnew) <- c("Wind","Temp","Error") plotdf <- ggplot(data = dfnew,aes(x=myairquality$Temp,y=myairquality$Wind)) + geom_point(aes(size=modelKSVM.error), color = "red") + ggtitle("KSVM Model") plotdf dfnew1 <- data.frame(myairquality$Wind,myairquality$Temp,modelKSVM.error) colnames(dfnew1) <- c("Wind","Temp","Error") plotdf1 <- ggplot(data = dfnew1,aes(x=myairquality$Temp,y=myairquality$Wind)) + geom_point(aes(size=modelKSVM.error), color = "red") + ggtitle("SVM Model") plotdf1 modelLM <- lm(Ozone ~., data=myairquality) modelLM modelLM.first <- predict(modelLM, myairquality) modelLM.error <- (myairquality$Ozone - modelLM.first) root_square(modelLM.error) dfnew2 <- data.frame(myairquality$Wind,airquality$Temp,modelLM.error) colnames(dfnew2) <- c("Wind","Temp","Error") plotdf2 <- ggplot(data = dfnew2,aes(x=myairquality$Temp,y=airquality$Wind)) + geom_point(aes(size=modelLM.error), color = "red") + ggtitle("LM Model") plotdf2 grid.arrange(plotdf,plotdf1, plotdf2, ncol = 2) # Test the model on the testing dataset, and compute the percent of goodOzone that was correctly predicted. # Plot the results. Use a scatter plot. Have the x-axis represent temperature, the y-axis represent wind, the shape representing what was predicted (good or bad day), the color representing the actual value of goodOzone (i.e. if the actual ozone level was good) and the size represent if the prediction was correct (larger symbols should be the observations the model got wrong). # Compute models and plot the results for svm (in the e1071 package) and nb (Naive Bayes, also in the e1071 package). # Show all three results (charts) in one window, using the grid.arrange function (have two charts in one row).
run
|
edit
|
history
|
help
0
Comparison between Normal and Laplace models for option pricing
Guess the next number
19-08-2020-SeidelSistema
Kruskal Wallis test
Regression
Quiz 2
Tablet-Sprite-image-Interval
Inferences for Two Population Means [Cement Hydration]
Outlier
oooo