Instructions
Make an R programming project to analyze the data on the estimated_crimes_1979_2019.csv file. This dataset contains estimated data at the state and national level and was derived from the Summary Reporting System (SRS). The sample of the data is shown in the screenshots below.
year | state abb | state n | populatio | violent cr | homicide | rape_lega rape_revisrobbery | aggravate | property | burglary | larceny | motor_ve | caveats | ||
1979 | 2.2E+08 | 1208030 | 21460 | 76390 | 480700 | 629480 | 11041500 | 3327700 | 6601000 | 1112800 | ||||
1979 | AK | Alaska | 406000 | 1994 | 54 | 292 | 445 | 1203 | 23193 | 5616 | 15076 | 2501 | ||
1979 | AL | Alabama | 3769000 | 15578 | 496 | 1037 | 4127 | 9918 | 144372 | 48517 | 83791 | 12064 | ||
1979 | AR | Arkansas | 2180000 | 7984 | 198 | 595 | 1626 | 5565 | 70949 | 21457 | 45267 | 4225 | ||
1979 | AZ | Arizona | 2450000 | 14528 | 219 | 1120 | 4305 | 8884 | 177977 | 48916 | 116976 | 12085 | ||
1979 | CA | California | 22696000 | 184087 | 2952 | 12239 | 75767 | 93129 | 1511021 | 496310 | 847148 | 167563 | ||
1979 | CO | Colorado | 2772000 | 14472 | 161 | 1472 | 4353 | 8486 | 180984 | 49741 | 117898 | 13345 | ||
1979 | CT | Connectic | 3115000 | 12902 | 131 | 752 | 6021 | 5998 | 167131 | 48229 | 96997 | 21905 | ||
1979 | DC | District of | 656000 | 10553 | 180 | 489 | 6920 | 2964 | 45877 | 13452 | 28819 | 3606 | ||
1979 | DE | Delaware | 582000 | 3127 | 33 | 162 | 753 | 2179 | 34853 | 8890 | 23081 | 2882 | ||
1979 | FL | Florida | 8860000 | 73881 | 1084 | 4576 | 22097 | 46124 | 607281 | 190884 | 378099 | 38298 | ||
1979 | GA | Georgia | 5118000 | 28594 | 877 | 2216 | 10939 | 14562 | 248641 | 81579 | 145758 | 21304 | ||
1979 | HI | Hawaii | 915000 | 2651 | 66 | 296 | 1688 | 601 | 63664 | 16538 | 40580 | 6546 | ||
1979 | IA | lowa | 2903000 | 5259 | 65 | 320 | 1457 | 3417 | 119620 | 26768 | 85023 | 7829 | ||
1979 | ID | Idaho | 905000 | 2613 | 49 | 186 | 392 | 1986 | 35766 | 9729 | 23577 | 2460 | ||
1979 | IL | Illinois | 11230000 | 83540 | 1203 | 3702 | 36056 | 42579 | 593750 | 161776 | 356062 | 75912 | ||
1979 | IN | Indiana | 5400000 | 18254 | 448 | 1681 | 7167 | 8958 | 230223 | 63176 | 143666 | 23381 | ||
1979 | KS | Kansas | 2369000 | 8376 | 130 | 626 | 2423 | 5197 | 107605 | 31504 | 69622 | 6479 | ||
1979 | KY | Kentucky | 3527000 | 8748 | 335 | 719 | 3247 | 4447 | 103548 | 32082 | 62431 | 9035 | ||
1979 | LA | Louisiana | 4026000 | 27229 | 682 | 1554 | 8832 | 16161 | 188514 | 56237 | 115856 | 16421 | ||
1979 | MA | Massachu | 5769000 | 30650 | 212 | 1428 | 11724 | 17286 | 310756 | 92570 | 152135 | 66051 | ||
1979 | MD | Maryland | 4149000 | 33007 | 406 | 1636 | 13740 | 17225 | 228159 | 62630 | 145297 | 20232 | ||
1979 | ME | Maine | 1097000 | 2221 | 31 | 131 | 349 | 1710 | 45030 | 12592 | 29828 | 2610 | ||
1979 | MI | Michigan | 9208000 | 56558 | 834 | 4100 | 20218 | 31406 | 509457 | 138806 | 315211 | 55440 | ||
1979 | MN | Minnesota | 4060000 | 8973 | 93 | 871 | 3754 | 4255 | 169376 | 45183 | 110827 | 13366 | ||
1979 | MO | Missouri | 4868000 | 25662 | 543 | 1638 | 10267 | 13214 | 214809 | 70423 | 124398 | 19988 | ||
1979 | MT | Montana | 786000 | 1762 | 33 | 162 | 260 | 1307 | 33298 | 6314 | 24537 | 2447 | ||
1979 | NC | North Car | 5606000 | 25009 | 600 | 1137 | 4327 | 18945 | 220113 | 72687 | 134903 | 12523 | ||
1979 | ND | North Dak | 657000 | 403 | 10 | 54 | 65 | 274 | 17703 | 3029 | 13607 | 1067 | ||
1979 | NE | Nebraska | 1574000 | 3556 | 65 | 338 | 1157 | 1996 | 59295 | 12804 | 42195 | 4296 | ||
1979 | NH | New Ham | 887000 | 1241 | 21 | 152 | 254 | 814 | 39373 | 10635 | 25856 | 2882 | ||
1979 | NJ | New Jerse | 7332000 | 36747 | 484 | 2037 | 18332 | 15894 | 390018 | 117421 | 221552 | 51045 | ||
1979 | NM | New Mexi | 1241000 | 7272 | 154 | 582 | 1502 | 5034 | 64563 | 18385 | 41745 | 4433 | ||
1979 | NV | Nevada | 702000 | 5866 | 123 | 418 | 2861 | 2464 | 56132 | 19799 | 31388 | 4945 | ||
1979 | NY | New York | 17649000 | 161906 | 2092 | 5394 | 93471 | 60949 | 933234 | 308302 | 500589 | 124343 | ||
1979 | OH | Ohio | 10731000 | 49092 | 865 | 3409 | 20909 | 23909 | 501389 | 138128 | 316162 | 47099 | ||
1979 | OK | Oklahoma | 2892000 | 11719 | 281 | 953 | 2966 | 7519 | 124293 | 42646 | 69292 | 12355 | ||
1979 | OR | Oregon | 2527000 | 13781 | 107 | 1121 | 3299 | 9254 | 147264 | 40682 | 96823 | 9759 | ||
1979 | PA | Pennsylva | 11731000 | 39133 | 724 | 2533 | 17855 | 18021 | 370914 | 109668 | 215567 | 45679 |
Assignment Solution
library(forecast)
library(ggplot2)
#preparing time series object for analysis
ts<- ts(data,start = 1985,end = 2019,frequency = 12)
seasonplot(ts, s = 12,col=rainbow(16), year.labels=TRUE,main = "Homicide rate by year and month")
ggseasonplot(ts, polar=TRUE)
autoplot.ts(ts)
ggAcf(ts)
ARIMA
fit.arima<- auto.arima(ts)
with(fit.arima,plot(fitted,residuals))
checkresiduals(fit.arima)
plot(forecast(fit.arima,h = 24))
For the Exploratory Data Analysis
library(tidyverse) ## For data wrangling and visualization
library(lubridate) ## To work with dates
library(ggpubr) ## Extra visualizations and themes
library(patchwork) ## Patch visualizations together
library(hrbrthemes)## extra themes and formatting
library(scales) ## For formatting numeric variables
library(tidytext) ## Reordering within facets in ggplot2
library(pier) ## Make interactive piecharts in R
library(ggalt) ## Extra visualizations
crimes <- read_csv(Crime.csv)
mutate(year = dmy(Date_Column))
crimes %>%
group_by(Year = floor_date(Date_Column,unit = "year")) %>%
summarise(Incidents=sum(total_incidents,na.rm = TRUE)) %>%
ungroup() %>% mutate(pct_change= (Incidents-lag(Incidents))/lag(Incidents),
pct_change=replace_na(pct_change,0)) %>%
ggplot(aes(Year,Incidents))+
geom_bar(stat="identity",fill="firebrick",color="black")+
geom_line(color="steelblue",size=1.5,linetype="dashed")+
geom_text(aes(label=percent(pct_change)),vjust=-1,color="black",face="bold")+ geom_text(aes(label=comma(Incidents)),vjust=1,fontface="bold",color="white")+
scale_y_comma(expand = c(0,0),limits = c(0,800000))+
scale_x_date(breaks = "year",date_labels ="%Y")+
theme_classic()+
labs(title = "Total Incidents over the years")
Let's visualize a simple barplot and see total incidents
crimes %>%
group_by(state¬_name) %>%
summarise(Incidents=sum(total_incidents)) %>%
ungroup() %>%
ggplot(aes(reorder(borough,Incidents),Incidents))+
geom_bar(stat = "identity",aes(fill=borough),color="black")+
coord_flip()+
scale_y_comma()+
geom_text(aes(label=comma(Incidents)),hjust=1)+
theme_classic()+
theme(legend.position = "none")+
labs(x=" ",y=" ",title = "Total Incidents for boroughs from 2008-2016 ")