Talk:List of U.S. states and territories by intentional homicide rate/Table creation code
For those interested here are the instructions for downloading the data and the code for producing the rate table (and a table for totals). The language used is the R (programming language).
Table for homicide by type edit
This code uses data from the CDC.
R code for US homicide by type
|
---|
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)
# https://wonder.cdc.gov/ucd-icd10-expanded.html
# Section 1 will vary depending on the data needed. Leave sections 2, 3, 4, 5 as default. In section 6, above the large box, see the five dots and select "Injury Intent and Mechanism"; select either "All Causes" or "Firearm". In section 7 check all boxes except "Show totals".
CDC_import = function(file_name) {
df = read.table(file_name, header=T, sep="\t", fill=T) %>%
filter(Notes == "") %>% select(-Notes,-Crude.Rate) %>%
mutate(Deaths = as.numeric(ifelse(Deaths == "Suppressed", NA, Deaths))) %>%
mutate(Rate = Deaths/Population*10^5)
return(df)
}
# set flag links
fix_links = function(df) {
df = df %>%
mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
mutate(State = ifelse(stringr::str_detect(State,"United States"),
"{{noflag|'''United States'''}}", State)) %>%
mutate(State = ifelse(stringr::str_detect(State,"Georgia"),
"{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
mutate(State = ifelse(stringr::str_detect(State,"District"),
"{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
return(df)
}
# homicide/suicide totals
# In section 1, group by State, Injury Intent. Type in a title "CDC - State Intent External".
state.homicide = CDC_import("intentional/CDC - State Intent External.txt") %>%
select(State,Injury.Intent,Deaths,Population) %>%
filter(Injury.Intent %in% c("Homicide")) %>%
select(State,Population,Deaths) %>%
bind_rows(setNames(data.frame("United States", sum(.$Population), sum(.$Deaths)), names(.))) %>%
mutate(Total = Deaths/Population*10^5)
# mechanism, all US
# In section 1, group by Mechanism. Type in a title "CDC - Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
homicide.mechanism = CDC_import("intentional/CDC - Homicide Mechanism.txt") %>%
setNames(nm = c("Mechanism","Code","Deaths","Population","Rate")) %>%
select(Mechanism,Rate) %>%
filter(Rate > 0) %>%
mutate(Mechanism = c("Stab","Drown.Other","Fall.Other","Fire","Hot.Other","Gun",
"Transport.Other","Poison","Struck","Choke")) %>%
filter(!stringr::str_detect(Mechanism, "Other"))
# mechanism by state
# In section 1, group by State, Mechanism. Type in a title "CDC - State Homicide Mechanism.txt". In section 6, view Intent and Mechanism, select Homicide.
state.homicide.mechanism = CDC_import("intentional/CDC - State Homicide Mechanism.txt") %>%
select(State,3,Rate) %>%
tidyr::pivot_wider(names_from = 2, values_from = Rate, values_fill=0) %>%
select(State,2,5,7,16,17,18) %>%
setNames(nm = c("State","Stab","Fire","Gun","Poison","Struck","Choke")) %>%
bind_rows(setNames(data.frame("United States", t(homicide.mechanism$Rate)), names(.))) %>%
inner_join(state.homicide) %>%
select(State,Total,Gun,Stab,Choke,Struck,Poison,Fire) %>%
arrange(desc(Total)) %>%
arrange(-stringr::str_detect(State, "United States")) %>%
mutate(across(where(is.numeric), round, 1)) %>%
fix_links()
write.csv(state.homicide.mechanism,"intentional/state_homicide_mechanism.csv",row.names=F)
|
Tables for homicide by year / decade edit
This code uses data from the FBI.
R code for US state homicide data by year/decade
|
---|
# Attempt to set working directory
# setwd(getSrcDirectory()[1]) # if running entire file
setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) # if running section
options(scipen=999) # don't use scientific notation
library(dplyr)
# FBI data
# https://cde.ucr.cjis.gov/LATEST/webapp/#/pages/downloads
# Additional Datasets > Summary Reporting System (SRS) > Download
# all crimes, states, years
crime_yearly = read.csv("estimated_crimes_1979_2022.csv") %>%
mutate(across(where(is.numeric), ~ifelse(is.na(.), 0, .))) %>%
filter(state_abbr != "") %>%
mutate(rape_legacy = rape_legacy + rape_revised) %>%
rename(State = state_name) %>%
select(-c(state_abbr,rape_revised))
# US overall
us_yearly = crime_yearly %>%
group_by(year) %>%
summarise(across(where(is.numeric), sum)) %>%
ungroup %>%
mutate(State = "United States")
# append US totals
crime_yearly = crime_yearly %>%
bind_rows(us_yearly)
# homicide by location and year
homicide_yearly = crime_yearly %>%
select(year,State,population,homicide) %>%
mutate(rate = homicide/population*10^5)
# function to set flag links
fix_links = function(df) {
df = df %>%
mutate(State = paste0("{{flagg|uspeft|pref=Crime in|",State,"}}")) %>%
mutate(State = ifelse(stringr::str_detect(State,"United States"),
"{{noflag|'''United States'''}}", State)) %>%
mutate(State = ifelse(stringr::str_detect(State,"Georgia"),
"{{flagg|uspeft|pref=Crime in|Georgia (U.S. state)|name=Georgia}}", State)) %>%
mutate(State = ifelse(stringr::str_detect(State,"District"),
"{{flagg|uspeft|pref=Crime in|District of Columbia|name=District of Columbia}}", State))
return(df)
}
# most recent five years ##
homicide_five_years = homicide_yearly %>%
filter(year > max(year)-5) %>%
select(-population) %>%
tidyr::pivot_wider(names_from = year, values_from = c(homicide,rate)) %>%
fix_links()
homicide_five_years_total = homicide_five_years %>%
select(State,homicide_2018:homicide_2022) %>%
setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_total,"homicide_five_years_total.csv",row.names=F)
homicide_five_years_rate = homicide_five_years %>%
select(State,rate_2018:rate_2022) %>%
setNames(nm = c("Location",2018:2022))
write.csv(homicide_five_years_rate,"homicide_five_years_rate.csv",row.names=F)
# average of each decade ##
homicide_decade = homicide_rate_yearly %>%
filter(year > 1979) %>%
mutate(decade = paste0(floor(year/10)*10,"s")) %>%
select(decade,State,homicide,rate) %>%
group_by(decade,State) %>%
summarise(across(where(is.numeric),mean)) %>%
ungroup %>%
tidyr::pivot_wider(names_from = decade, values_from = c(homicide,rate)) %>%
fix_links()
total_table = homicide_decade %>%
select(State,homicide_1980s:homicide_2020s) %>%
arrange(desc(homicide_2020s)) %>%
arrange(-stringr::str_detect(State, "United States"))
write.csv(total_table,"state_homicide_total_decade.csv",row.names=F)
rate_table = homicide_decade %>%
select(State,rate_1980s:rate_2020s) %>%
arrange(desc(rate_2020s)) %>%
arrange(-stringr::str_detect(State, "United States"))
write.csv(rate_table,"state_homicide_rate_decade.csv",row.names=F)
|