In this lab we will learn how to use location data Google is recording through our devices, for some quick mapping using R.

To download your JSON data:

Get your API key for basemaps: https://developers.google.com/maps/documentation/embed/get-api-key

To facilitate your use of R, we will use a ‘wrapper’ called Rstudio and we provide you with an R script.

We start our R script by installing a ‘library’ or a ‘package’. These are basically a list of pre-programmed functions written by the R community (even you can write one!) to easily help us undertake computation we otherwise would have had to create code for from scratch

We can now initiate the R session by telling R to ‘attach’ certain libraries to this session, since we will be making use of functions in them.

knitr::opts_chunk$set(echo = TRUE)

install.packages("jsonlite")
install.packages('dplyr')
install.packages('plyr')
install.packages('ggplot2')
install.packages('ggmap')
install.packages('tidyr') 
install.packages('leaflet')
install.packages('leaflet.extras')
install.packages('viridis')
install.packages('wesanderson')
install.packages('geosphere')
install.packages('maps')
install.packages('mapproj')
install.packages('ggthemes')

library(jsonlite)
library(dplyr)
library(plyr)
library(ggplot2)
library(ggmap)
library(tidyr) 
library(leaflet)
library(leaflet.extras)
library(viridis)
library(wesanderson)
library(geosphere)
library(maps)
library(mapproj)
library(ggthemes)

To start your analysis make sure you set your working directory to whatever folder you have stored your JSON file in. Add the name of the directory and file in the relevant code chunk below.

E.g. this could be “e.g. C:/Users/ridaq/Dropbox (MIT)”. Remember R uses / in its file paths so you will have to change the way the default filepath is written in your computer.

#set working directory
#add your working directory below

setwd('')

#get json file. remember $locations only exist in full history not monthly history
#add your filename e.g. 'location.json' below

LocData= fromJSON(txt = "")

#save the location data/information from JSON file into a dataframe
google.locations <- LocData$locations
Sys.setlocale("LC_TIME", "C")

#create new columns
google.locations$timekeeping <- as.POSIXct(as.numeric(google.locations$timestampMs)/1000, origin="1970-01-01")
google.locations$weekdays <- factor(format(google.locations$timekeeping, "%a"), 
                                    levels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"))
#converting e7 lat long to gps
google.locations$latGPS <- as.numeric(google.locations$latitudeE7/1E7)
google.locations$lonGPS <- as.numeric(google.locations$longitudeE7/1E7)


#test measurement interval
date.diff <- max(google.locations$timekeeping) - min(google.locations$timekeeping)
measurements.per.hour <- (nrow(google.locations)/as.numeric(date.diff))/24  # number of measurements per hour
measurements.per.minute <- measurements.per.hour/60  # number of measurements per minute
measurement.interval <- 60/measurements.per.hour  # number of minutes per measurement

 measurement.interval

#creating subset dataset which only has locations
locdata<-subset(google.locations, select=c("timekeeping", "velocity", "weekdays","latGPS","lonGPS"))
locdata<-transform(only.loc, Year = substr(timekeeping, 1, 4))


#to use ggmap enable static map api in google and then generate api key
register_google(key='enter your key here', account_type = "standard")

mapWorld <- borders("world", colour = "gray85", fill = "gray80")  

camb1<- get_map(c(lon=-71.092315,lat=42.368428), zoom = 12, source='stamen',maptype="toner-lite")
camb2<- get_map(c(lon=-71.092315,lat=42.368428), zoom = 13, source='stamen',maptype="toner-lite")

camb3<- get_map(c(lon=-71.092315,lat=42.368428), zoom = 14, source='google',maptype="road", color='bw')
cambcol<-get_map(c(lon=-71.092315,lat=42.368428), zoom = 14, source='google',maptype="road")
cambton<- get_map(c(lon=-71.092315,lat=42.368428), zoom = 13, source='stamen',maptype="toner")

#show basemap differences

ggmap(camb1)
ggmap(camb2)
ggmap(camb3)
ggmap(cambcol)
ggmap(cambton)

We can split our larger dataset into a year to make analysis easier. You can also plug in your full dataset here instead.

loc2019<- locdata[which(locdata$Year=='2019'), ]

 

 #geompoint zoomed in
  ggmap(camb3,darken = c(0.6, "white")) +
   geom_point(data = loc2019, aes(x = lonGPS, y = latGPS),color = "#3B9AB2", alpha = .05, size = .5)+ 
   theme(legend.position = "right") + 
   labs(
     x = "Longitude", 
     y = "Latitude", 
     title = "Google Location History",
     caption = "\nA simple point plot shows recorded positions.")
   
 #heatmap
 ggmap(camb3, darken = c(0.6, "white")) +
   stat_density2d(data = locdata, aes(x = lonGPS, y = latGPS, fill=..level..),alpha=.5, size=0.2,bins=100, geom="polygon"
   )+
   scale_fill_gradient(low="green", high='maroon')+
   labs(
     x = "Longitude", 
     y = "Latitude", 
     title = "Heatmap of recorded positions")
 
 
 
 #all travel data
 
 ggplot()+ mapWorld +  theme_map() +
   geom_point(data = loc2019, aes(x = lonGPS, y = latGPS),color = "#3B9AB2", alpha = .5, size = 1)+ 
   theme(legend.position = "right") + 
   labs( 
     title = "Google Location History",
     caption = "\nA simple point plot shows recorded positions.")

Using leaflet you can also make an interactive map for all points

leafmap.point<- leaflet(loc2019) %>%
  addProviderTiles(providers$CartoDB.Positron, 
                   options = providerTileOptions(opacity = 0.7)) %>%
  
  #find boundign box http://bboxfinder.com/ long lat 
  #(~min(-71.144427), ~min(42.346422), ~max(-71.048083), ~max(42.398743)
  fitBounds(~min(loc2019$lonGPS), ~min(loc2019$latGPS ), ~max(loc2019$lonGPS), ~max(loc2019$latGPS)) %>%  
  addCircleMarkers(
    
    stroke = FALSE, fillOpacity = .09,
    radius= 3,
    lng = ~loc2019$lonGPS, lat = ~loc2019$latGPS 
  )

 

leafmap.point


 
#making leaflet map for cambridge
leafmap.cam<- leaflet(loc2019) %>%
  addProviderTiles(providers$CartoDB.Positron, 
                   options = providerTileOptions(opacity = 0.7)) %>%
  
  #find boundign box http://bboxfinder.com/ long lat 
    fitBounds(~min(-71.144427), ~min(42.346422), ~max(-71.048083), ~max(42.398743)) %>%  
  addCircleMarkers(
    
    stroke = FALSE, fillOpacity = .09,
    radius= 1,
    lng = ~loc2019$lonGPS, lat = ~loc2019$latGPS 
  )



leafmap.cam

We can use the geosphere library to calculate distance travelled between each point

# calculating distance between lat and long using geosphere library
#distance stored in meters
loc2019<-mutate(loc2019, 
             Distance = distHaversine(cbind(lonGPS, latGPS),
                                      cbind(lag(lonGPS), lag(latGPS)))) #using lag to compare -1 points
loc2019 <- loc2019[which(loc2019$Distance<500000), ]


ggplot(data=loc2019, aes(x=loc2019$weekdays, weights=(loc2019$Distance/1000))) + #dividing to get km
  labs(title="Distance travelled by Day", subtitle = 'in 2019',x="Day", y = "sum of distance in km",
       caption = "\n Google Maps History Data.")+ 
  geom_bar(fill="#3B9AB2")+ 
    theme_minimal()