Practice with Spatial Data

MAPS!

Maps and Spatial Fields are fun!
DataViz
Spatial
GGPlot
Assignment
Author

Geraline Trossi-Torres

Published

April 18, 2024

OVERVIEW

In this assignment, we’ll consider some of the tools and techniques for visualizing spatial data. Spatial data comes in two broad categories, geographic and spatial fields. Let’s practice a few visualizations to get a feel for how these things work!

GEOGRAPHIC MAPS!

In class I bet Ronald that he would end up creating some kind of map based visualization before he graduated with his PHD. This is because he works on Malaria - a terrible disease with a strong spatial component to its risk levels. Let’s get some Malaria data and map it!

The data I obtained were from the Malaria Atlas. I downloaded a csv for 10 years of data for all the countries the had on file.

Code
library(tidyverse)
library(readxl)
library(rnaturalearth)
library(rnaturalearthdata)
library(dplyr)

Malaria <- read.csv("National_Unit_data.csv")

Incidence<- Malaria%>%
  filter(Metric == "Infection Prevalence")%>%
  mutate(Prevalence = Value, Year = as.factor(Year))

#%>%
  #select(c(ISO3, Prevalence, Year))

Now I’m going to use the rnaturalearth package to create contry polygons. Then I’ll add the Malaria data to that data frame.

Code
world_map <- ne_countries(scale = "medium", returnclass = "sf")

map_data <- world_map %>%
  left_join(Incidence, by = c("iso_a3" = "ISO3"))%>%
  filter(!is.na(Prevalence))

Now I will make a plot!

Code
library(gganimate)
library(transformr)
library(magick)
library(gifski)
# 
ggplot() +
  geom_sf(data = map_data%>%
            filter(continent=="Africa"),
          aes(fill = Prevalence)) +
  scale_fill_gradient(low = "white", high = "red", na.value = "gray", name = "Malaria Prevalence") +
  theme_minimal() +
  theme(axis.text = element_blank(), axis.ticks = element_blank(), axis.title = element_blank()) +
  labs(title = "Malaria Prevalence by Country")

My VERSION

Code
library(ggplot2)
library(sf)
library(maps)

# Load U.S. map data
usa <- map_data("usa")

# Plot U.S. map
ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
  coord_fixed(1.3) +  # Aspect ratio adjustment
  theme_void()  # Remove unnecessary elements

Code
# Load U.S. state boundary data
states <- map_data("state")

# Plot U.S. map with state boundaries
ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
  geom_polygon(data = states, aes(x = long, y = lat, group = group), fill = NA, color = "gray") + # Add state boundaries
  coord_fixed(1.3) +  # Aspect ratio adjustment
  theme_void()  # Remove unnecessary elements

Code
# Load required packages
library(readxl)
library(ggplot2)

# Load your Chlamydia cases dataset (assuming it's named 'ChlamydiaInfectionRate.xlsx')
chlamydia_data <- read_excel("ChlamydiaInfectionRate.xlsx")

# Merge Chlamydia cases data with state boundaries data
states_chlamydia <- merge(states, chlamydia_data, by.x = "region", by.y = "State", all.x = TRUE)

# Unique values in the 'region' column of the 'states' dataframe
unique(states$region)
 [1] "alabama"              "arizona"              "arkansas"            
 [4] "california"           "colorado"             "connecticut"         
 [7] "delaware"             "district of columbia" "florida"             
[10] "georgia"              "idaho"                "illinois"            
[13] "indiana"              "iowa"                 "kansas"              
[16] "kentucky"             "louisiana"            "maine"               
[19] "maryland"             "massachusetts"        "michigan"            
[22] "minnesota"            "mississippi"          "missouri"            
[25] "montana"              "nebraska"             "nevada"              
[28] "new hampshire"        "new jersey"           "new mexico"          
[31] "new york"             "north carolina"       "north dakota"        
[34] "ohio"                 "oklahoma"             "oregon"              
[37] "pennsylvania"         "rhode island"         "south carolina"      
[40] "south dakota"         "tennessee"            "texas"               
[43] "utah"                 "vermont"              "virginia"            
[46] "washington"           "west virginia"        "wisconsin"           
[49] "wyoming"             
Code
# Unique values in the 'State' column of the 'chlamydia_data' dataframe
unique(chlamydia_data$State)
 [1] "Louisiana"      "Mississippi"    "Alaska"         "South Carolina"
 [5] "Georgia"        "Alabama"        "North Carolina" "Arkansas"      
 [9] "Illinois"       "South Dakota"   "Arizona"        "Tennessee"     
[13] "New Mexico"     "New York"       "Missouri"       "Texas"         
[17] "Nevada"         "Delaware"       "Maryland"       "Oklahoma"      
[21] "Indiana"        "US TOTAL†"      "California"     "Nebraska"      
[25] "Florida"        "Rhode Island"   "North Dakota"   "Kansas"        
[29] "Virginia"       "Ohio"           "Iowa"           "Colorado"      
[33] "Wisconsin"      "Michigan"       "Pennsylvania"   "Kentucky"      
[37] "Massachusetts"  "Minnesota"      "Hawaii"         "Oregon"        
[41] "Washington"     "Montana"        "New Jersey"     "Connecticut"   
[45] "Utah"           "Wyoming"        "Idaho"          "West Virginia" 
[49] "Maine"          "New Hampshire"  "Vermont"       
Code
# Trim leading and trailing spaces from the key columns
states$region <- trimws(states$region)
chlamydia_data$State <- trimws(chlamydia_data$State)

# Convert key columns to lowercase before merging
states$region <- tolower(states$region)
chlamydia_data$State <- tolower(chlamydia_data$State)

# Perform merge
states_chlamydia <- merge(states, chlamydia_data, by.x = "region", by.y = "State", all.x = TRUE)

# Plot the map with Chlamydia cases data
ggplot() +
  geom_polygon(data = usa, aes(x = long, y = lat, group = group), fill = "white", color = "black") +
  geom_polygon(data = states_chlamydia, aes(x = long, y = lat, group = group, fill = Cases), color = "darkgray") + # Add state boundaries with Chlamydia cases
  scale_fill_gradient(low = "lightpink", high = "darkred", name = "Chlamydia Cases") + # Customize the color scale
  coord_fixed(1.3) +  # Aspect ratio adjustment
  theme_void()  # Remove unnecessary elements

Code
library(rnaturalearth)
library(rnaturalearthdata)
library(dplyr)
library(rnaturalearthhires)

# Get the spatial data for countries
countries <- ne_countries(scale = "medium", returnclass = "sf")

# Filter the dataset to extract Puerto Rico
puerto_rico <- subset(countries, admin == "Puerto Rico")

# Plot Puerto Rico's geometry
ggplot() +
  geom_sf(data = puerto_rico) +
  theme_void()