The objective of this project is to use neural network with keras to solve regression problem in Python and R. This project is originally done in Python in Deep Learning with Keras, from Antonio Gulli and Sujit Pal, Keras regression example - predicting benzene levels in the air. Here we also do with R with some improvision.
This section executes purple part of the project flow shown in the figure below in R.
Following are the links for the code and report generated
Relates to purple part of the project (see figure above).
Relates to blue part of the project (see figure above).
Courtesy of https://archive.ics.uci.edu/ml/datasets/Air+Quality
temp <- tempfile()
download.file("https://archive.ics.uci.edu/ml/machine-learning-databases/00360/AirQualityUCI.zip",
temp)
df_data <- read.table(unz(temp, "AirQualityUCI.csv"), sep=";", dec=",", header=TRUE)
unlink(temp)
colnames(df_data)
## [1] "Date" "Time" "CO.GT." "PT08.S1.CO."
## [5] "NMHC.GT." "C6H6.GT." "PT08.S2.NMHC." "NOx.GT."
## [9] "PT08.S3.NOx." "NO2.GT." "PT08.S4.NO2." "PT08.S5.O3."
## [13] "T" "RH" "AH" "X"
## [17] "X.1"
#https://github.com/rstudio/keras/issues/311
#df_data <- read.table("c:/ds_local/dataset/AirQualityUCI.csv", dec=",", sep=";", header=TRUE)
head(df_data)
library(visdat)
## Warning: package 'visdat' was built under R version 3.4.4
vis_dat(df_data)
## Warning: package 'bindrcpp' was built under R version 3.4.3
vis_miss(df_data)
# last two columns has no data.
df_data$X <- NULL;df_data$X.1 <- NULL
# Date Time has no impact on prediction
df_data$Date <- NULL;df_data$Time <- NULL
# We delete all rows than as any column with NA
df_data <- na.omit(df_data)
# curating the column names
colnames(df_data)
## [1] "CO.GT." "PT08.S1.CO." "NMHC.GT." "C6H6.GT."
## [5] "PT08.S2.NMHC." "NOx.GT." "PT08.S3.NOx." "NO2.GT."
## [9] "PT08.S4.NO2." "PT08.S5.O3." "T" "RH"
## [13] "AH"
colnames(df_data) <- gsub("\\.", "_", colnames(df_data)) # repalce . with _
colnames(df_data) <- gsub("_$","", colnames(df_data)) # remove last _ character
write.csv(df_data,
file="c:/ds_local/dataset/AirQualityUCI_cleaned.csv",
row.names = FALSE)