1. Overview

This project aims to explore a data set containing US and Canadian Fire Occurrences from 1986-2013. The analysis will mainly focus on attributes such as:

to examine the distribution of fires based on their cause and areas. Following the examination of the distributions of the fires, I hope to discuss the trends that are discernible from the data.

2. Load Necessary Libraries

library(ncdf4)
library(sf)
library(ggplot2)
library(dplyr)
library(stars)
library(maps)

3. Load Fire data set and grab variables

importing the fire data set and opening the file

# fire data set path
fire_data_path <- "/Users/annekebrouwer/Documents/geog490/project/na10km_USCAN_1986-2013_ann_all.nc"

# Open file
nc <- nc_open(fire_data_path)

retrieving and defining variables from dataset

# Grabbing variables
all_area <- ncvar_get(nc, "all_area")
all_area_tot <- ncvar_get(nc, "all_area_tot")
all_npts <- ncvar_get(nc, "all_npts")
all_npts_tot <- ncvar_get(nc, "all_npts_tot")
hu_area <- ncvar_get(nc, "hu_area")
hu_area_tot <- ncvar_get(nc, "hu_area_tot")
hu_npts <- ncvar_get(nc, "hu_npts")
hu_npts_tot <- ncvar_get(nc, "hu_npts_tot")
lat <- ncvar_get(nc, "lat")
lon <- ncvar_get(nc, "lon")
lt_area <- ncvar_get(nc, "lt_area")
lt_area_tot <- ncvar_get(nc, "lt_area_tot")
lt_npts <- ncvar_get(nc, "lt_npts")
lt_npts_tot <- ncvar_get(nc, "lt_npts_tot")
time <- ncvar_get(nc, "time")
time_bnds <- ncvar_get(nc, "time_bnds")
unk_area <- ncvar_get(nc, "unk_area")
unk_area_tot <- ncvar_get(nc, "unk_area_tot")
unk_npts <- ncvar_get(nc, "unk_npts")
unk_npts_tot <- ncvar_get(nc, "unk_npts_tot")
x <- ncvar_get(nc, "x")
y <- ncvar_get(nc, "y")

4. Exploring the total number of fires and their cause

Let’s make a histogram showing the number of fires caused by lightning, fires caused by humans, and fires with an unknown cause.

First, we must create a data frame that contains the total number of fires per cause.

# Create a DataFrame containing the number of human, lighting, and unknown caused fires
fire_counts <- data.frame(
  cause = c("Human", "Lightning", "Unknown"),
  count = c(sum(hu_npts_tot, na.rm = TRUE), sum(lt_npts_tot, na.rm = TRUE), sum(unk_npts_tot, na.rm = TRUE))
)

Next, we define a color palette and plot the data on a histogram using ggplot.

# Define a custom color palette
custom_colors <- c("Human" = "tomato3", "Lightning" = "orange", "Unknown" = "indianred4")

# Create the plot with custom colors
ggplot(fire_counts, aes(x = cause, y = count, fill = cause)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = custom_colors) +
  labs(x = "Cause of Fire", y = "Number of Fires", title = "Histogram of Fires by Cause US and Canada 1986-2013") +
  theme_minimal()

According to this histogram, most fires were caused by humans. However, it is important to note that lightning caused over 400,000 fires between 1986-2013. Although it seems small in comparison to the human-caused fires bar that nears 1,500,000 fire occurrences, lightning fires make up a great deal of the total number of fire occurrences.

5. Observing the fire occurrence variation by year

To grasp a better understanding of the pattern of fire occurrence, it is important to look at the dates in which these fires took place. Let’s plot the number of fires per year for each fire cause between 1986-2013.

To start, we must convert the date format from days since 01-01-1900 to year.

# Convert days since Jan 1, 1900 to a date format
start_date <- as.Date("1900-01-01")
date <- start_date + time - 1  # Subtract 1 because days since start_date starts from 1

# Extract the year from the date
year <- as.numeric(format(date, "%Y"))

Then, I created separate data frames for each fire cause that each contain the number of fires per year for every year between 1986 and 2013.

# Sum the lightning-caused fires across all grid cells for each year
lt_npts_tot <- apply(lt_npts, 3, sum, na.rm = TRUE)

# Create a data frame with year and total number of lightning-caused fires
lt_data <- data.frame(year = time, lt_fires = lt_npts_tot)

# Sum the human-caused fires across all grid cells for each year
hu_npts_tot <- apply(hu_npts, 3, sum, na.rm = TRUE)

# Create a data frame with year and total number of human-caused fires
hu_data <- data.frame(year = time, hu_fires = hu_npts_tot)

# Sum the unknown-caused fires across all grid cells for each year
unk_npts_tot <- apply(unk_npts, 3, sum, na.rm = TRUE)

# Create a data frame with year and total number of human-caused fires
unk_data <- data.frame(year = time, unk_fires = unk_npts_tot)

I then merged the data frames so that I could plot them on the same plot.

# Merge hu_data, lt_data, and unk_data by year
merged_data <- merge(merge(hu_data, lt_data, by = "year", all = TRUE), unk_data, by = "year", all = TRUE)

# Extract the year from the date
merged_data$year <- as.numeric(format(date, "%Y"))

# Plot merged data with a legend
ggplot(merged_data, aes(x = year)) +
  geom_line(aes(y = hu_fires, color = "Human-caused Fires")) +
  geom_line(aes(y = lt_fires, color = "Lightning-caused Fires")) +
  geom_line(aes(y = unk_fires, color = "Unknown-caused Fires")) + 
  labs(x = "Year", y = "Number of Fires", title = "Fire Occurrence by Cause Over Time") +
  scale_color_manual(values = c("Human-caused Fires" = "tomato3", "Lightning-caused Fires" = "orange", "Unknown-caused Fires" = "indianred4"),
                     name = "Cause") +
  theme_minimal()

Finally, using the following code I was able to calculate the year in which each cause of fire had the most fire occurrences.

It appears that the years with the most fire occurrences in this time period were 2006 and 2007.

6. Mapping Fire Occurrences by cause (1986-2013)

Now, we can use the fire area data to visualize the spatial distribution of these fires within the time frame.

First, we need to create data sets containing fire area (separated by cause) and their locations.

# create area of fire data frames lt_area
# Filter out NA values from lat and lon, keeping the same rows

valid_indices <- !is.na(lat) & !is.na(lon)
lt_area_filtered <- lt_area[valid_indices]
lat_filtered <- lat[valid_indices]
lon_filtered <- lon[valid_indices]

# Create lt_area_df dataframe with time column included
lt_area_df <- data.frame(
  lt_area = lt_area_filtered,
  lat = lat_filtered,
  lon = lon_filtered,
  time = time
)

# Filter out rows with NA values in the lt_area and time variables
lt_area_df <- lt_area_df[!is.na(lt_area_df$lt_area) & !is.na(lt_area_df$time), ]

# Convert days since Jan 1, 1900 to a date format
lt_area_df$date <- as.Date("1900-01-01") + lt_area_df$time - 1
lt_area_df <- lt_area_df[lt_area_df$lt_area != 0, ]
lt_area_df$year <- as.integer(format(lt_area_df$date, "%Y"))

# create area of fire data frames hu_area
# Filter out NA values from lat and lon, keeping the same rows
valid_indices_h <- !is.na(lat) & !is.na(lon)
hu_area_filtered <- hu_area[valid_indices_h]
lat_filtered_h <- lat[valid_indices_h]
lon_filtered_h <- lon[valid_indices_h]

# Create lt_area_df dataframe
hu_area_df <- data.frame(
  hu_area = hu_area_filtered,
  lat = lat_filtered_h,
  lon = lon_filtered_h,
  time = time
)

hu_area_df <- hu_area_df[!is.na(hu_area_df$hu_area) & !is.na(hu_area_df$time), ]

# Convert days since Jan 1, 1900 to a date format
hu_area_df$date <- as.Date("1900-01-01") + hu_area_df$time - 1
hu_area_df <- hu_area_df[hu_area_df$hu_area != 0, ]
hu_area_df$year <- as.integer(format(hu_area_df$date, "%Y"))

# Filter unk_area to get non-zero and non-NA values
# Filter out NA values from lat and lon, keeping the same rows
valid_indices_u <- !is.na(lat) & !is.na(lon)
unk_area_filtered <- unk_area[valid_indices_u]
lat_filtered_u <- lat[valid_indices_u]
lon_filtered_u <- lon[valid_indices_u]

# Create lt_area_df dataframe
unk_area_df <- data.frame(
  unk_area = unk_area_filtered,
  lat = lat_filtered_u,
  lon = lon_filtered_u,
  time = time
)
unk_area_df <- unk_area_df[!is.na(unk_area_df$unk_area) & !is.na(unk_area_df$time), ]


# Convert days since Jan 1, 1900 to a date format
unk_area_df$date <- as.Date("1900-01-01") + unk_area_df$time - 1
unk_area_df <- unk_area_df[unk_area_df$unk_area != 0, ]
unk_area_df$year <- as.integer(format(unk_area_df$date, "%Y"))

Let’s load the shapefiles for plotting

# Load the US map including Alaska
us_sf <- st_as_sf(maps::map("world", region = "USA", plot = FALSE, fill = TRUE))
us_sf_states <- st_as_sf(
  map("state", 
      region = c("california", "nevada", "idaho", "montana", "washington", "oregon", "wyoming", "utah", "colorado", "north dakota", "south dakota", "nebraska", "kansas", "oklahoma", "texas", "minnesota", "iowa", "missouri", "arkansas", "louisiana", "wisconsin", "michigan", "illinois", "indiana", "kentucky", "tennessee", "mississippi", "alabama", "ohio", "west virginia", "virginia", "north carolina", "south carolina", "georgia", "florida", "pennsylvania", "new york", "vermont", "new hampshire", "maine", "massachusetts", "rhode island", "connecticut", "new jersey", "delaware", "maryland", "new mexico", "arizona"), 
      plot = FALSE, 
      fill = TRUE))


# Load the Canada map
canada_sf <- st_as_sf(maps::map("world", region = "Canada", plot = FALSE, fill = TRUE))

# Combine the US and Canada maps
us_canada_sf <- rbind(us_sf, canada_sf)

Now, let’s visualize where lightning fires took place between 1986-2013.

# Create a scatter plot of fire locations in US and CANADA lt_fires
ggplot() +
  geom_point(data = lt_area_df, aes(x = lon, y = lat), color = "orange", alpha = 0.1, size = 0.01) +
  geom_sf(data = us_canada_sf, fill = "transparent", color = "black") +
  geom_sf(data = us_sf_states, fill = "transparent", color = "black") +
  labs(x = "Longitude", y = "Latitude", title = "Lightning Fire Locations in North America (lt_area > 0)")+
  theme_minimal() +
  coord_sf(xlim = c(-180, -50), ylim = c(22, 85))