# Radius of the earth in meters EARTH_RADIUS = 6371000; PI = 3.14159; elevationDistance <- function(loc1, loc2) { leftFields = strsplit(loc1, ":") rightFields = strsplit(loc2, ":") elev1 = as.numeric(leftFields[[1]][3]) elev2 = as.numeric(rightFields[[1]][3]) dist = elev2 - elev1 return(dist) } earthDistance <- function(loc1, loc2) { leftFields = strsplit(loc1, ":") rightFields = strsplit(loc2, ":") lat1 = as.numeric(leftFields[[1]][1]) * PI / 180.0 lat2 = as.numeric(rightFields[[1]][1]) * PI / 180.0 lon1 = as.numeric(leftFields[[1]][2]) * PI / 180.0 lon2 = as.numeric(rightFields[[1]][2]) * PI / 180.0 x = (lon2-lon1) * cos((lat1+lat2)/2); y = (lat2-lat1); d = sqrt(x*x + y*y) * EARTH_RADIUS; return(d) } timeDistance <- function(time1, time2) { time1 <- strptime(time1, format="%m/%d/%Y %H:%M:%S") time2 <- strptime(time2, format="%m/%d/%Y %H:%M:%S") val = difftime(time2, time1, units="hours") return(as.double(val)) } dayHour <- function(time) { time <- unclass(strptime(time, format="%m/%d/%Y %H:%M:%S")) return(time$hour + time$min / 60.0) } rides <- read.csv("data/cabi-sample-rides.filtered.csv") rides$elevation <- apply(rides, 1, function(row) elevationDistance(row['startPos'], row['endPos'])) rides$distance <- apply(rides, 1, function(row) earthDistance(row['startPos'], row['endPos'])) rides$duration <- apply(rides, 1, function(row) timeDistance(row['startDate'], row['endDate'])) rides$startHour <- apply(rides, 1, function(row) dayHour(row['startDate'])) nzRides <- subset(rides, distance > 0) write.csv(nzRides, "data/cabi-nz-rides.ext.cvs") write.csv(rides, "data/cabi-rides.ext.cvs") # ggplot2 for the map library(maps) library(ggplot2) stations <- read.csv("data/cabi-station-counts.csv") stations$time <- factor(stations$time, levels = c("EARLYMORN","LATEMORN","AFTERNOON", "EVENING", "NIGHT", "LATENIGHT")) all_states <- map_data("state") states <- subset(all_states, region %in% c( "district of columbia" ) ) p <- ggplot(stations) p <- p + geom_polygon( data=states, aes(x=long, y=lat, group)) p <- p + geom_point( data=stations, aes(x=long, y=lat, size = count), color="gold2") + scale_size(name="Bikes") p <- p + facet_grid(type ~ time) # ggplot2 smooth p <- ggplot(rides) p <- p + geom_smooth(aes(x=startHour, y=distance)) p <- p + coord_cartesian(ylim=c(1000,2500)) # ggplot2 histogram p <- ggplot(rides) p <- p + geom_histogram(aes(x=duration), binwidth = .1) p <- p + scale_y_sqrt() p <- p + facet_grid(subscription ~ .) p <- p + scale_x_continuous(limits=c(0, 4))