library(readr)
library(dplyr)
library(ggplot2)
cases <- read_csv("datasets/COVID19_countries_data.csv")
ggplot(cases, aes(Oxford_stringency_level, confirm))+
geom_point()

#ggsave("wrap_up_stats/cases_vs_oxford_scatter.png")
model <- lm(confirm ~ Oxford_stringency_level, cases)
#summary view
summary(model)
##
## Call:
## lm(formula = confirm ~ Oxford_stringency_level, data = cases)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4554 -2532 -1452 168 38591
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1571.03 1503.74 1.045 0.300
## Oxford_stringency_level 32.16 21.32 1.509 0.136
##
## Residual standard error: 6012 on 66 degrees of freedom
## Multiple R-squared: 0.03334, Adjusted R-squared: 0.01869
## F-statistic: 2.276 on 1 and 66 DF, p-value: 0.1362
#inspecting whether the residuals are normal distributed
#density distributions of the residuals
cases <- cases %>%
mutate(residuals = resid(model))
cases %>%
ggplot(aes(residuals)) +
geom_density()

#ggsave("wrap_up_stats/residual_density.png")
#qq plot of the residuals
ggplot(cases, aes(sample = residuals)) +
geom_qq() +
geom_qq_line(distribution = qnorm)

ggsave("wrap_up_stats/qqplot_residuals.png")
#looking at the residuals vs. the fitted values
cases <- cases %>%
mutate(fitted = fitted(model))
ggplot(cases, aes(fitted, residuals)) +
geom_point() +
geom_hline(yintercept = 0)

ggsave("wrap_up_stats/residual_vs_fitted.png")
#Data points with fitted regression line
ggplot(cases, aes(Oxford_stringency_level, confirm))+
geom_point()+
geom_abline(intercept = 1571, slope = 32.2)

ggsave("wrap_up_stats/cases_oxford_stringency_scatter_w_regression.png")
#direct geom_smooth way
ggplot(cases, aes(Oxford_stringency_level, confirm))+
geom_point() +
geom_smooth(method = "lm")

ggsave("wrap_up_stats/cases_oxford_stringency_scatter_w_regression_v2.png")
model2 <- lm(confirm ~ Oxford_stringency_level + tourist_departure_2018, cases)
summary(model2)
##
## Call:
## lm(formula = confirm ~ Oxford_stringency_level + tourist_departure_2018,
## data = cases)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9138.6 -1624.8 -556.4 1617.7 14881.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.410e+03 1.384e+03 -1.019 0.3139
## Oxford_stringency_level 3.263e+01 1.910e+01 1.708 0.0948 .
## tourist_departure_2018 2.085e-04 2.190e-05 9.521 3.74e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3999 on 43 degrees of freedom
## (22 observations deleted due to missingness)
## Multiple R-squared: 0.6893, Adjusted R-squared: 0.6749
## F-statistic: 47.71 on 2 and 43 DF, p-value: 1.214e-11
library(broom)
tidy(model)
## # A tibble: 2 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 1571. 1504. 1.04 0.300
## 2 Oxford_stringency_level 32.2 21.3 1.51 0.136
tidy(model2)
## # A tibble: 3 x 5
## term estimate std.error statistic p.value
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) -1410. 1384. -1.02 3.14e- 1
## 2 Oxford_stringency_level 32.6 19.1 1.71 9.48e- 2
## 3 tourist_departure_2018 0.000209 0.0000219 9.52 3.74e-12
#direct geom_smooth way
ggplot(cases, aes(tourist_departure_2018, confirm))+
geom_point() +
geom_smooth(method = "lm")

ggsave("wrap_up_stats/cases_vs_tourist_departure.png")