Talk:List of U.S. states and territories by intentional homicide rate/Table creation code

For those interested here are the instructions for downloading the data and the code for producing the rate table (and a table for totals). The language used is the R (programming language).

Table for homicide by type edit

This code uses data from the CDC.

R code for US homicide by type
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# https://wonder.cdc.gov/ucd-icd10-expanded.html
# Section 1 will vary depending on the data needed. Leave sections 2, 3, 4, 5 as default. In section 6, above the large box, see the five dots and select "Injury Intent and Mechanism"; select either "All Causes" or "Firearm". In section 7 check all boxes except "Show totals".

CDC_import = function(file_name) {
  df = read.table(file_name, header=T, sep="\t", fill=T) %>%
    filter(Notes == "") %>% select(-Notes,-Crude.Rate) %>%
    mutate(Deaths = as.numeric(ifelse(Deaths == "Suppressed", NA, Deaths))) %>%
    mutate(Rate = Deaths/Population*10^5)
  return(df)
}

# set flag links
fix_links = function(df) {
  df = df %>%
  mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
  mutate(State = ifelse(stringr::str_detect(State,"United States"),
                        "{{noflag|'''United States'''}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"Georgia"),
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"District"),
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
  return(df)
}

# homicide/suicide totals
# In section 1, group by State, Injury Intent. Type in a title "CDC - State Intent External".
state.homicide = CDC_import("intentional/CDC - State Intent External.txt") %>%
  select(State,Injury.Intent,Deaths,Population) %>%
  filter(Injury.Intent %in% c("Homicide")) %>%
  select(State,Population,Deaths) %>%
  bind_rows(setNames(data.frame("United States", sum(.$Population), sum(.$Deaths)), names(.))) %>%
  mutate(Total = Deaths/Population*10^5)

# mechanism, all US
# In section 1, group by Mechanism. Type in a title "CDC - Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
homicide.mechanism = CDC_import("intentional/CDC - Homicide Mechanism.txt") %>%
  setNames(nm = c("Mechanism","Code","Deaths","Population","Rate")) %>%
  select(Mechanism,Rate) %>%
  filter(Rate > 0) %>%
  mutate(Mechanism = c("Stab","Drown.Other","Fall.Other","Fire","Hot.Other","Gun",
                       "Transport.Other","Poison","Struck","Choke")) %>%
  filter(!stringr::str_detect(Mechanism, "Other"))

# mechanism by state
# In section 1, group by State, Mechanism. Type in a title "CDC - State Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
state.homicide.mechanism = CDC_import("intentional/CDC - State Homicide Mechanism.txt") %>%
  select(State,3,Rate) %>%
  tidyr::pivot_wider(names_from = 2, values_from = Rate, values_fill=0) %>%
  select(State,2,5,7,16,17,18) %>%
  setNames(nm = c("State","Stab","Fire","Gun","Poison","Struck","Choke")) %>%
  
  bind_rows(setNames(data.frame("United States", t(homicide.mechanism$Rate)), names(.))) %>%
  inner_join(state.homicide) %>%
  select(State,Total,Gun,Stab,Choke,Struck,Poison,Fire) %>%
  arrange(desc(Total)) %>%
  arrange(-stringr::str_detect(State, "United States")) %>%
  mutate(across(where(is.numeric), round, 1)) %>%
  fix_links()
write.csv(state.homicide.mechanism,"intentional/state_homicide_mechanism.csv",row.names=F)

Tables for homicide by year / decade edit

This code uses data from the FBI.

R code for US state homicide data by year/decade
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)

# FBI data
# https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/downloads
# Additional Datasets > Summary Reporting System (SRS) > Download

# all crimes, states, years
crime_yearly = read.csv("estimated_crimes_1979_2022.csv") %>%
  mutate(across(where(is.numeric), ~ifelse(is.na(.), 0, .))) %>%
  filter(state_abbr != "") %>%
  mutate(rape_legacy = rape_legacy + rape_revised) %>%
  rename(State = state_name) %>%
  select(-c(state_abbr,rape_revised))

# US overall
us_yearly = crime_yearly %>%
  group_by(year) %>%
  summarise(across(where(is.numeric), sum)) %>%
  ungroup %>%
  mutate(State = "United States")

# append US totals
crime_yearly = crime_yearly %>%
  bind_rows(us_yearly)

# homicide by location and year
homicide_yearly = crime_yearly %>%
  select(year,State,population,homicide) %>%
  mutate(rate = homicide/population*10^5)

# function to set flag links
fix_links = function(df) {
  df = df %>%
  mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
  mutate(State = ifelse(stringr::str_detect(State,"United States"), 
                        "{{noflag|'''United States'''}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"Georgia"), 
                        "{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
  mutate(State = ifelse(stringr::str_detect(State,"District"), 
                        "{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
  return(df)
}

# most recent five years ##
homicide_five_years = homicide_yearly %>%
  filter(year > max(year)-5) %>%
  select(-population) %>%
  tidyr::pivot_wider(names_from = year, values_from = c(homicide,rate)) %>%
  fix_links()

homicide_five_years_total = homicide_five_years %>%
  select(State,homicide_2018:homicide_2022) %>%
  setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_total,"homicide_five_years_total.csv",row.names=F)

homicide_five_years_rate = homicide_five_years %>%
  select(State,rate_2018:rate_2022) %>%
  setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_rate,"homicide_five_years_rate.csv",row.names=F)

# average of each decade ##
homicide_decade = homicide_rate_yearly %>%
  filter(year > 1979) %>%
  mutate(decade = paste0(floor(year/10)*10,"s")) %>%
  select(decade,State,homicide,rate) %>%
  
  group_by(decade,State) %>%
  summarise(across(where(is.numeric),mean)) %>%
  ungroup %>%
  
  tidyr::pivot_wider(names_from = decade, values_from = c(homicide,rate)) %>%
  fix_links()

total_table = homicide_decade %>%
  select(State,homicide_1980s:homicide_2020s) %>%
  arrange(desc(homicide_2020s)) %>%
  arrange(-stringr::str_detect(State, "United States"))
write.csv(total_table,"state_homicide_total_decade.csv",row.names=F)

rate_table = homicide_decade %>%
  select(State,rate_1980s:rate_2020s) %>%
  arrange(desc(rate_2020s)) %>%
  arrange(-stringr::str_detect(State, "United States"))
write.csv(rate_table,"state_homicide_rate_decade.csv",row.names=F)