This is the code used to generate the graph.
# init.
library(rvest)
library(ggplot2)
library(dplyr)
library(lubridate)
library(stringr)
library(tidyr)
library(readr)
# scrape data
read_html("https://www.manchester.ac.uk/coronavirus/cases/") %>%
html_nodes("table") %>%
html_table() %>%
bind_rows() -> tbls
# import the early non-tabulated data
tibble(Date = seq(ymd('2020-09-21'), ymd('2020-09-26'), by='days'),
Students = c(1, 1, 2, 5, 6, 6),
Staff = rep(NA, 6)) %>%
mutate(Date = ymd(Date)) -> tblsOLD
# separate and label 2020 data
tbls %>%
slice_head(n = 96) %>%
mutate(Date = str_c(Date, " 2020")) %>%
mutate(Date = dmy(Date)) -> tbls2020
# separate and label 2021 data
tbls %>%
.[97:461,] %>%
mutate(Date = str_c(Date, " 2021")) %>%
mutate(Date = dmy(Date)) -> tbls2021
# separate and label 2022 data
tbls %>%
.[462:dim(tbls)[1],] %>%
mutate(Date = str_c(Date, " 2022")) %>%
mutate(Date = dmy(Date)) -> tbls2022
# join them all
bind_rows(list(tblsOLD, tbls2020, tbls2021, tbls2022)) %>%
arrange(Date) -> df
# tidy-up
rm(list= ls()[! (ls() %in% c('df'))])
# join "students" and "student"
df %>%
rowwise() %>%
mutate(Students = sum(c(Students, Student), na.rm = TRUE)) %>%
select(-Student) -> df
# pivot longer
df %>%
pivot_longer(c(Students, Staff), names_to = "pop") %>%
mutate(pop = factor(pop, levels = c("Students", "Staff"))) %>%
arrange(pop, Date) -> df
# compute rolling mean data
df_roll <- df %>%
group_by(Date) %>%
summarise(sum = sum(value)) %>%
mutate(value = round(as.vector(stats::filter(sum, rep(1/7, 7), sides = 2))))
# import significant dates for annotation
sigDates <- tibble(Date = parse_date(c("21/09/2020", "28/09/2020",
"05/10/2020", "07/10/2020",
"03/12/2020", "09/12/2020",
"18/12/2020", "04/01/2021"),
format = "%d/%m/%Y"),
labels = c("1st year induction starts", "wider induction starts",
"teaching starts", "DfE Tier 3 starts",
"travel window starts", "travel window ends",
"closure starts", "closure ends"))
# create plot
ggplot() +
geom_area(data = df, mapping = aes(x = Date, y = value, fill = pop), alpha = 0.6) +
geom_line(data = df_roll, mapping = aes(x = Date, y = value, col = "black")) +
labs(fill = "",
subtitle = paste0("Data retrieved automatically from \"manchester.ac.uk/coronavirus/cases\" on ", Sys.Date()),
x = "Date",
caption = paste0("Mean no. of daily reported cases over the last 7 days = ",
round(mean(tail(df_roll, n = 7)$sum), 1),
" (~",
round(sum(tail(df_roll, n = 7)$sum*(100000 / (12800 + 40250))), 1),
" new cases/100,000/week)", "\n",
"Case rate estimates based on data from \"manchester.ac.uk/discover/facts-figures\"")) +
ggtitle("University of Manchester COVID-19 Daily Reported Cases") +
scale_x_date(breaks = df$Date[wday(df$Date,label = TRUE) == "Mon" & day(df$Date) <= 7],
date_labels = "%d %b",
minor_breaks = df$Date[which(wday(df$Date)==2)],
) +
scale_y_continuous(name = "Cases [n/day]",
sec.axis = sec_axis(~ . * (100000/(12800+40250)), name = "Estimated daily case rate [n/100,000/day]")
) +
theme(axis.text.x.top = element_text(angle = 20,
hjust = 0)
) +
scale_color_identity(name = "",
guide = "legend",
labels = "7-day rolling mean")