+1 (315) 557-6473 

Program in R to Analyse the Estimated Crimes between 1979 and 2019 Assignment Solution


Instructions

Make an R programming project to analyze the data on the estimated_crimes_1979_2019.csv file. This dataset contains estimated data at the state and national level and was derived from the Summary Reporting System (SRS). The sample of the data is shown in the screenshots below.

year state abb state n populatio violent cr homicide   rape_lega rape_revisrobbery   aggravate property burglary larceny motor_ve caveats
1979     2.2E+08 1208030 21460 76390   480700 629480 11041500 3327700 6601000 1112800  
1979 AK Alaska 406000 1994 54 292   445 1203 23193 5616 15076 2501  
1979 AL Alabama 3769000 15578 496 1037   4127 9918 144372 48517 83791 12064  
1979 AR Arkansas 2180000 7984 198 595   1626 5565 70949 21457 45267 4225  
1979 AZ Arizona 2450000 14528 219 1120   4305 8884 177977 48916 116976 12085  
1979 CA California 22696000 184087 2952 12239   75767 93129 1511021 496310 847148 167563  
1979 CO Colorado 2772000 14472 161 1472   4353 8486 180984 49741 117898 13345  
1979 CT Connectic 3115000 12902 131 752   6021 5998 167131 48229 96997 21905  
1979 DC District of 656000 10553 180 489   6920 2964 45877 13452 28819 3606  
1979 DE Delaware 582000 3127 33 162   753 2179 34853 8890 23081 2882  
1979 FL Florida 8860000 73881 1084 4576   22097 46124 607281 190884 378099 38298  
1979 GA Georgia 5118000 28594 877 2216   10939 14562 248641 81579 145758 21304  
1979 HI Hawaii 915000 2651 66 296   1688 601 63664 16538 40580 6546  
1979 IA lowa 2903000 5259 65 320   1457 3417 119620 26768 85023 7829  
1979 ID Idaho 905000 2613 49 186   392 1986 35766 9729 23577 2460  
1979 IL Illinois 11230000 83540 1203 3702   36056 42579 593750 161776 356062 75912  
1979 IN Indiana 5400000 18254 448 1681   7167 8958 230223 63176 143666 23381  
1979 KS Kansas 2369000 8376 130 626   2423 5197 107605 31504 69622 6479  
1979 KY Kentucky 3527000 8748 335 719   3247 4447 103548 32082 62431 9035  
1979 LA Louisiana 4026000 27229 682 1554   8832 16161 188514 56237 115856 16421  
1979 MA Massachu 5769000 30650 212 1428   11724 17286 310756 92570 152135 66051  
1979 MD Maryland 4149000 33007 406 1636   13740 17225 228159 62630 145297 20232  
1979 ME Maine 1097000 2221 31 131   349 1710 45030 12592 29828 2610  
1979 MI Michigan 9208000 56558 834 4100   20218 31406 509457 138806 315211 55440  
1979 MN Minnesota 4060000 8973 93 871   3754 4255 169376 45183 110827 13366  
1979 MO Missouri 4868000 25662 543 1638   10267 13214 214809 70423 124398 19988  
1979 MT Montana 786000 1762 33 162   260 1307 33298 6314 24537 2447  
1979 NC North Car 5606000 25009 600 1137   4327 18945 220113 72687 134903 12523  
1979 ND North Dak 657000 403 10 54   65 274 17703 3029 13607 1067  
1979 NE Nebraska 1574000 3556 65 338   1157 1996 59295 12804 42195 4296  
1979 NH New Ham 887000 1241 21 152   254 814 39373 10635 25856 2882  
1979 NJ New Jerse 7332000 36747 484 2037   18332 15894 390018 117421 221552 51045  
1979 NM New Mexi 1241000 7272 154 582   1502 5034 64563 18385 41745 4433  
1979 NV Nevada 702000 5866 123 418   2861 2464 56132 19799 31388 4945  
1979 NY New York 17649000 161906 2092 5394   93471 60949 933234 308302 500589 124343  
1979 OH Ohio 10731000 49092 865 3409   20909 23909 501389 138128 316162 47099  
1979 OK Oklahoma 2892000 11719 281 953   2966 7519 124293 42646 69292 12355  
1979 OR Oregon 2527000 13781 107 1121   3299 9254 147264 40682 96823 9759  
1979 PA Pennsylva 11731000 39133 724 2533   17855 18021 370914 109668 215567 45679  

Assignment Solution

library(forecast)

library(ggplot2)

#preparing time series object for analysis

ts<- ts(data,start = 1985,end = 2019,frequency = 12)

seasonplot(ts, s = 12,col=rainbow(16), year.labels=TRUE,main = "Homicide rate by year and month")

ggseasonplot(ts, polar=TRUE)

autoplot.ts(ts)

ggAcf(ts)

ARIMA

fit.arima<- auto.arima(ts)

with(fit.arima,plot(fitted,residuals))

checkresiduals(fit.arima)

plot(forecast(fit.arima,h = 24))

For the Exploratory Data Analysis

library(tidyverse) ## For data wrangling and visualization

library(lubridate) ## To work with dates

library(ggpubr) ## Extra visualizations and themes

library(patchwork) ## Patch visualizations together

library(hrbrthemes)## extra themes and formatting

library(scales) ## For formatting numeric variables

library(tidytext) ## Reordering within facets in ggplot2

library(pier) ## Make interactive piecharts in R

library(ggalt) ## Extra visualizations

crimes <- read_csv(Crime.csv)

mutate(year = dmy(Date_Column))

crimes %>%

group_by(Year = floor_date(Date_Column,unit = "year")) %>%

summarise(Incidents=sum(total_incidents,na.rm = TRUE)) %>%

ungroup() %>% mutate(pct_change= (Incidents-lag(Incidents))/lag(Incidents),

pct_change=replace_na(pct_change,0)) %>%

ggplot(aes(Year,Incidents))+

geom_bar(stat="identity",fill="firebrick",color="black")+

geom_line(color="steelblue",size=1.5,linetype="dashed")+

  geom_text(aes(label=percent(pct_change)),vjust=-1,color="black",face="bold")+ geom_text(aes(label=comma(Incidents)),vjust=1,fontface="bold",color="white")+

scale_y_comma(expand = c(0,0),limits = c(0,800000))+

scale_x_date(breaks = "year",date_labels ="%Y")+

theme_classic()+

labs(title = "Total Incidents over the years")

Let's visualize a simple barplot and see total incidents

crimes %>%

group_by(state¬_name) %>%

summarise(Incidents=sum(total_incidents)) %>%

ungroup() %>%

ggplot(aes(reorder(borough,Incidents),Incidents))+

geom_bar(stat = "identity",aes(fill=borough),color="black")+

coord_flip()+

scale_y_comma()+

geom_text(aes(label=comma(Incidents)),hjust=1)+

theme_classic()+

theme(legend.position = "none")+

labs(x=" ",y=" ",title = "Total Incidents for boroughs from 2008-2016 ")