Let us first load and inspect the San Francisco data set:
load("data/SFincidents2012.rda")#incidents
head(incidents)
## IncidntNum Category Descript DayOfWeek
## 1 120000499 ROBBERY ROBBERY, BODILY FORCE Sunday
## 2 120000938 NON-CRIMINAL DEATH REPORT, CAUSE UNKNOWN Sunday
## 3 120001936 SUICIDE SUICIDE BY JUMPING Sunday
## 4 120002235 VEHICLE THEFT STOLEN AUTOMOBILE Sunday
## 5 120003186 NON-CRIMINAL DEATH REPORT, CAUSE UNKNOWN Monday
## 6 120000041 ASSAULT BATTERY Sunday
## Date Time PdDistrict Resolution Location
## 1 2012-01-01 01:50 CENTRAL JUVENILE BOOKED VALLEJO ST / POWELL ST
## 2 2012-01-01 06:40 NORTHERN NONE 800 Block of OFARRELL ST
## 3 2012-01-01 14:45 NORTHERN NONE 1200 Block of GOUGH ST
## 4 2012-01-01 17:00 MISSION NONE 1600 Block of BRYANT ST
## 5 2012-01-02 08:30 CENTRAL NONE 1200 Block of STOCKTON ST
## 6 2012-01-01 00:25 SOUTHERN NONE 200 Block of MARKET ST
## X Y violent censusBlock
## 1 -122.4105 37.79837 TRUE 06075010700
## 2 -122.4183 37.78516 FALSE 06075012202
## 3 -122.4244 37.78436 FALSE 06075016000
## 4 -122.4105 37.76562 FALSE 06075017700
## 5 -122.4084 37.79679 FALSE 06075010700
## 6 -122.3974 37.79244 TRUE 06075011700
We defined the following crimes as violent: as assault, robbery, rape, kidnapping, and purse snatching.
#base method
DoW=c("Friday","Saturday","Sunday","Monday","Tuesday","Wednesday","Thursday")
DoWLookup=0:6;names(DoWLookup) = DoW
head(DoWLookup[incidents$DayOfWeek])
## Sunday Sunday Sunday Sunday Monday Sunday
## 2 2 2 2 3 2
incidents$HrOfDay = substr(incidents$Time, 1,2)
incidents$TimeOfDay = as.numeric(incidents$HrOfDay)+as.numeric(substr(incidents$Time, 4,5))/60
DoW=c("Friday","Saturday","Sunday","Monday","Tuesday","Wednesday","Thursday")
DoWLookup=0:6;names(DoWLookup) = DoW
incidents$HourOfWeek = incidents$TimeOfDay + DoWLookup[incidents$DayOfWeek]*24
Load and explore the library mgcv. Try to recreate a graph similar to the one below. (Hint, the bam function is much faster than gam)
Using the commands table() and as.POSIXct() build an hourly time series of counts of (all) crimes. (Hint: as.POSIXct(“2012-01-02 08”, format=“%Y-%m-%d %H”))
Hour = apply(incidents[,c("Date","HrOfDay")], 1,paste, collapse=" ")
h = table(as.POSIXct(Hour, format="%Y-%m-%d %H"))
plot(as.POSIXct(names(h)), as.numeric(h), type="l", xlab = "", ylab = "Num Crimes", col = "darkblue")
title("Hourly crimes in San Francisco")
grid()
CrimeCounts = cbind.data.frame(date=as.POSIXct(names(h)), NumCrimes=as.numeric(h))
#cht = gvisAnnotatedTimeLine(CrimeCounts, "date", "NumCrimes")
cht = gvisAnnotatedTimeLine(CrimeCounts, datevar="date", numvar = "NumCrimes", options=list(width = "900px"))
print(cht, file = "CrimeCounts.html")
#CrimeCounts=xts(NumCrimes=as.numeric(h),as.POSIXct(names(h)))
CrimeCounts=xts(as.numeric(h),as.POSIXct(names(h)))
plot(CrimeCounts)
#dygraph(CrimeCounts)
dygraph(CrimeCounts) %>% dyRangeSelector()
round(mean(x),2)
, which in pipe notation would then be written as mean(x) %>% round(,2)
Try to rewrite the chained command from above we used to create the Hour variable using pipes:
Hour <- incidents[,c("Date","HrOfDay")] %>% apply(., 1,paste, collapse=" ")
head(Hour, 10)
## [1] "2012-01-01 01" "2012-01-01 06" "2012-01-01 14" "2012-01-01 17"
## [5] "2012-01-02 08" "2012-01-01 00" "2012-01-01 00" "2012-01-01 05"
## [9] "2012-01-02 03" "2012-01-01 21"