Author: Brad Cable
Date: 2016-01-26
Illinois State University (IT 497 - Spring 2016)
tempOzone <- tempfile()
download.file(
"http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/hourly_44201_2014.zip",
tempOzone
)
ozone <- read.csv(unz(tempOzone, filename="hourly_44201_2014.csv"))
unlink(tempOzone)
tempOzone <- tempfile()
download.file(
"http://aqsdr1.epa.gov/aqsweb/aqstmp/airdata/hourly_44201_2015.zip",
tempOzone
)
ozone2015 <- read.csv(unz(tempOzone, filename="hourly_44201_2015.csv"))
unlink(tempOzone)
ozoneIL <- ozone[ozone$State.Name == "Illinois",]
ozoneIL$Month <- as.POSIXlt(ozoneIL$Date.Local)$mon
ozone2015IL <- ozone2015[ozone2015$State.Name == "Illinois",]
ozone2015IL$Month <- as.POSIXlt(ozone2015IL$Date.Local)$mon
ozoneILcnty <- aggregate(
Sample.Measurement ~ County.Name, data=ozoneIL, FUN=mean
)
ozoneILcnty <- ozoneILcnty[order(-ozoneILcnty$Sample.Measurement),]
ozoneILcntyHead5 <- head(ozoneILcnty, n=5)
ozoneILcntyHead5$County.Name <- factor(
ozoneILcntyHead5$County.Name,
levels=ozoneILcntyHead5$County.Name
)
ozoneILcntyHead10 <- head(ozoneILcnty, n=10)
ozoneILcntyHead10$County.Name <- factor(
ozoneILcntyHead10$County.Name,
levels=ozoneILcntyHead10$County.Name
)
ozoneILpreagg <- ozoneIL[ozoneIL$County.Name %in% ozoneILcntyHead5$County.Name,]
ozoneILpreagg$Hour <- as.POSIXlt(ozoneILpreagg$Time.Local, format="%H:%M")$hour
ozoneILpreagg$Day <- as.POSIXlt(ozoneILpreagg$Date.Local)$mday
monVect <- c(
"January", "February", "March", "April", "May",
"June", "July", "August", "September", "October",
"November", "December"
)
aggByMonth <- function(ozoneAgg, FUN){
ozoneAgg <- aggregate(
Sample.Measurement ~ County.Name + Month,
data=ozoneAgg, FUN=FUN
)
ozoneAgg <- ozoneAgg[
order(ozoneAgg$County.Name, ozoneAgg$Month),
]
ozoneAgg$Month <- months(as.POSIXlt(paste(
rep("1970", nrow(ozoneAgg)),
ozoneAgg$Month+1,
rep("01", nrow(ozoneAgg))
, sep="-")))
ozoneAgg$Month <- factor(ozoneAgg$Month, levels=monVect)
return(ozoneAgg)
}
aggByHour <- function(ozoneAgg, FUN){
ozoneAgg <- aggregate(
Sample.Measurement ~ County.Name + Hour,
data=ozoneAgg, FUN=FUN
)
ozoneAgg <- ozoneAgg[
order(ozoneAgg$County.Name, ozoneAgg$Hour),
]
ozoneAgg$Hour <- factor(ozoneAgg$Hour, levels=0:23)
return(ozoneAgg)
}
aggByDay <- function(ozoneAgg, FUN){
ozoneAgg <- aggregate(
Sample.Measurement ~ County.Name + Day,
data=ozoneAgg, FUN=FUN
)
ozoneAgg <- ozoneAgg[
order(ozoneAgg$County.Name, ozoneAgg$Day),
]
ozoneAgg$Day <- factor(ozoneAgg$Day, levels=1:31)
return(ozoneAgg)
}
aggLength <- function(ozoneAgg){
ozoneAgg <- aggByMonth(ozoneAgg, length)
for(cnty in levels(factor(ozoneAgg$County.Name))){
for(mon in monVect){
if(nrow(ozoneAgg[
ozoneAgg$Month == mon &
ozoneAgg$County.Name == cnty,
]) == 0){
ozoneAgg <- rbind(ozoneAgg, c(cnty, mon, 0))
}
}
}
ozoneAgg <- ozoneAgg[order(
ozoneAgg$County.Name, ozoneAgg$Month
),]
ozoneAgg$Sample.Measurement <- as.numeric(
ozoneAgg$Sample.Measurement
)
ozoneAgg
}
ozoneILaggMonthMean <- aggByMonth(ozoneILpreagg, mean)
ozoneILaggHourMean <- aggByHour(ozoneILpreagg, mean)
ozoneILaggDayMean <- aggByDay(ozoneILpreagg, mean)
ozoneILaggLength <- aggLength(ozoneIL)
ozone2015ILaggLength <- aggLength(ozone2015IL)
ozoneILcntyHead5
## County.Name Sample.Measurement
## 18 Randolph 0.03418317
## 9 Jo Daviess 0.03231734
## 1 Adams 0.03179620
## 8 Jersey 0.03114063
## 7 Hamilton 0.03070671
library(ggplot2)
g <- ggplot(ozoneILcntyHead5, aes(x=County.Name, y=Sample.Measurement))
g <- g + geom_bar(stat="identity")
g + xlab("County Name") + ylab("Ozone Emissions (PPM)")
g <- ggplot(ozoneILaggMonthMean, aes(x=Month, y=Sample.Measurement))
g <- g + geom_bar(stat="identity") + facet_grid(County.Name ~ .)
g + xlab("Month") + ylab("Ozone Emissions (PPM)")
g <- ggplot(ozoneILaggMonthMean, aes(
x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")
Clearly more pollution during normal human awake hours.
g <- ggplot(ozoneILaggHourMean, aes(
x=Hour, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Hour") + ylab("Ozone Emissions (PPM)")
g <- ggplot(ozoneILaggDayMean, aes(
x=Day, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Day") + ylab("Ozone Emissions (PPM)")
Interesting to see just how many counties have less data in January through March and November/December in Illinois.
g <- ggplot(ozoneILaggLength, aes(
x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")
Looking at the 2015 data, we see a similar pattern for January through March, and the drop in November/December can be explained by the data not being collected yet for this particular year.
g <- ggplot(ozone2015ILaggLength, aes(
x=Month, y=Sample.Measurement, group=County.Name, colour=County.Name
))
g <- g + geom_path()
g + xlab("Month") + ylab("Ozone Emissions (PPM)")