#
# Recover ETHPOP population data
# using ETHPOP in/out flow data
#
# N Green
#
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readxl)
library(readr)
library(tibble)
library(ggplot2)
library(demoSynthPop)
# model UK-born/non UK-born
##TODO:
# load ONS starting population
dat_ons <- read_ONS_census2011()
## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...
## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
# load ETHPOP cleaned data
dat_pop <- read_csv("~/R/cleanETHPOP/output_data/clean_pop_Leeds2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## age = col_double(),
## ETH.group = col_character(),
## sex = col_character(),
## pop = col_double(),
## year = col_double()
## )
dat_inflow <- read_csv("~/R/cleanETHPOP/output_data/clean_inmigrants_Leeds2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## age = col_double(),
## ETH.group = col_character(),
## sex = col_character(),
## inmigrants = col_double(),
## year = col_double()
## )
dat_outflow <- read_csv("~/R/cleanETHPOP/output_data/clean_outmigrants_Leeds2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## age = col_double(),
## ETH.group = col_character(),
## sex = col_character(),
## outmigrants = col_double(),
## year = col_double()
## )
dat_births <- read_csv("~/R/cleanETHPOP/output_data/clean_births_Leeds2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## ETH.group = col_character(),
## year = col_double(),
## sex = col_character(),
## births = col_double()
## )
dat_deaths <- read_csv("~/R/cleanETHPOP/output_data/clean_deaths_Leeds2.csv")
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## X1 = col_double(),
## ETH.group = col_character(),
## sex = col_character(),
## age = col_double(),
## deaths = col_double(),
## agegrp = col_character(),
## year = col_double()
## )
res <-
run_model(dat_pop,
dat_births,
dat_deaths,
dat_inflow,
dat_outflow)
## year 2012
## year 2013
## year 2014
## year 2015
## year 2016
## year 2017
## year 2018
## year 2019
## year 2020
## year 2021
## year 2022
## year 2023
## year 2024
## year 2025
## year 2026
## year 2027
## year 2028
## year 2029
## year 2030
## year 2031
## year 2032
## year 2033
## year 2034
## year 2035
## year 2036
## year 2037
## year 2038
## year 2039
## year 2040
## year 2041
## year 2042
## year 2043
## year 2044
## year 2045
## year 2046
## year 2047
## year 2048
## year 2049
## year 2050
## year 2051
## year 2052
## year 2053
## year 2054
## year 2055
## year 2056
## year 2057
## year 2058
## year 2059
## year 2060
## year 2061
sim_pop <- bind_rows(res)
########
# plot #
########
sim_plot <-
sim_pop %>%
filter(sex == "M",
ETH.group == "BAN",
year %in% c(2011, 2020, 2030, 2040, 2050, 2060)) %>%
mutate(year = as.factor(year))
# mutate(eth_sex_year = interaction(ETH.group, sex, year))
dat_plot <-
dat_pop %>%
filter(sex == "M",
ETH.group == "BAN",
year %in% c(2011, 2020, 2030, 2040, 2050, 2060)) %>%
mutate(year = as.factor(year))
p1 <-
ggplot(sim_plot, aes(x=age, y=pop, colour = year)) +
geom_line() +
ylim(0,11000)
p2 <-
ggplot(dat_plot, aes(x=age, y=pop, colour = year)) +
geom_line() +
ylim(0,11000)
gridExtra::grid.arrange(p1, p2)
## Warning: Removed 51 row(s) containing missing values (geom_path).
## Warning: Removed 13 row(s) containing missing values (geom_path).

## differences
diff_plot <-
merge(dat_plot, sim_plot,
by = c("age", "ETH.group", "sex", "year"), suffixes = c(".eth", ".sim")) %>%
mutate(diff_pop = pop.eth - pop.sim,
scaled_diff = diff_pop/pop.eth)
p3 <-
ggplot(diff_plot, aes(x=age, y=diff_pop, colour = year)) +
ggtitle("ETHPOP - estimated populations") +
geom_line()
p3

p3 + ylim(-2000, 1000)
## Warning: Removed 3 row(s) containing missing values (geom_path).

p4 <-
ggplot(diff_plot, aes(x=age, y=scaled_diff, colour = year)) +
ggtitle("(ETHPOP - estimated populations)/ETHPOP") +
geom_line()
p4

p4 + ylim(-2, 3)
## Warning: Removed 4 row(s) containing missing values (geom_path).
