main.R

#
# Recover ETHPOP population data
# using ETHPOP in/out flow data
#
# N Green
#


library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(readxl)
library(readr)
library(tibble)
library(ggplot2)
library(demoSynthPop)


# model UK-born/non UK-born
##TODO:

# load ONS starting population
dat_ons <- read_ONS_census2011()

## New names:
## * `` -> ...1
## * `` -> ...2
## * `` -> ...3
## * `` -> ...4
## * `` -> ...5
## * ...

## Warning: The `x` argument of `as_tibble.matrix()` must have column names if `.name_repair` is omitted as of tibble 2.0.0.
## Using compatibility `.name_repair`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

# load ETHPOP cleaned data
dat_pop <- read_csv("~/R/cleanETHPOP/output_data/clean_pop_Leeds2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   age = col_double(),
##   ETH.group = col_character(),
##   sex = col_character(),
##   pop = col_double(),
##   year = col_double()
## )

dat_inflow <- read_csv("~/R/cleanETHPOP/output_data/clean_inmigrants_Leeds2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   age = col_double(),
##   ETH.group = col_character(),
##   sex = col_character(),
##   inmigrants = col_double(),
##   year = col_double()
## )

dat_outflow <- read_csv("~/R/cleanETHPOP/output_data/clean_outmigrants_Leeds2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   age = col_double(),
##   ETH.group = col_character(),
##   sex = col_character(),
##   outmigrants = col_double(),
##   year = col_double()
## )

dat_births <- read_csv("~/R/cleanETHPOP/output_data/clean_births_Leeds2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   ETH.group = col_character(),
##   year = col_double(),
##   sex = col_character(),
##   births = col_double()
## )

dat_deaths <- read_csv("~/R/cleanETHPOP/output_data/clean_deaths_Leeds2.csv")

## Warning: Missing column names filled in: 'X1' [1]

## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   ETH.group = col_character(),
##   sex = col_character(),
##   age = col_double(),
##   deaths = col_double(),
##   agegrp = col_character(),
##   year = col_double()
## )

res <-
  run_model(dat_pop,
            dat_births,
            dat_deaths,
            dat_inflow,
            dat_outflow)

## year 2012

## year 2013

## year 2014

## year 2015

## year 2016

## year 2017

## year 2018

## year 2019

## year 2020

## year 2021

## year 2022

## year 2023

## year 2024

## year 2025

## year 2026

## year 2027

## year 2028

## year 2029

## year 2030

## year 2031

## year 2032

## year 2033

## year 2034

## year 2035

## year 2036

## year 2037

## year 2038

## year 2039

## year 2040

## year 2041

## year 2042

## year 2043

## year 2044

## year 2045

## year 2046

## year 2047

## year 2048

## year 2049

## year 2050

## year 2051

## year 2052

## year 2053

## year 2054

## year 2055

## year 2056

## year 2057

## year 2058

## year 2059

## year 2060

## year 2061

sim_pop <- bind_rows(res)



########
# plot #
########

sim_plot <-
  sim_pop %>%
  filter(sex == "M",
         ETH.group == "BAN",
         year %in% c(2011, 2020, 2030, 2040, 2050, 2060)) %>%
  mutate(year = as.factor(year))
  # mutate(eth_sex_year = interaction(ETH.group, sex, year))

dat_plot <-
  dat_pop %>%
  filter(sex == "M",
         ETH.group == "BAN",
         year %in% c(2011, 2020, 2030, 2040, 2050, 2060)) %>%
  mutate(year = as.factor(year))


p1 <-
  ggplot(sim_plot, aes(x=age, y=pop, colour = year)) +
  geom_line() +
  ylim(0,11000)

p2 <-
  ggplot(dat_plot, aes(x=age, y=pop, colour = year)) +
  geom_line() +
  ylim(0,11000)

gridExtra::grid.arrange(p1, p2)

## Warning: Removed 51 row(s) containing missing values (geom_path).

## Warning: Removed 13 row(s) containing missing values (geom_path).

## differences

diff_plot <-
  merge(dat_plot, sim_plot,
        by = c("age", "ETH.group", "sex", "year"), suffixes = c(".eth", ".sim")) %>%
  mutate(diff_pop = pop.eth - pop.sim,
         scaled_diff = diff_pop/pop.eth)

p3 <-
  ggplot(diff_plot, aes(x=age, y=diff_pop, colour = year)) +
  ggtitle("ETHPOP - estimated populations") +
  geom_line()

p3

p3 + ylim(-2000, 1000)

## Warning: Removed 3 row(s) containing missing values (geom_path).

p4 <-
  ggplot(diff_plot, aes(x=age, y=scaled_diff, colour = year)) +
  ggtitle("(ETHPOP - estimated populations)/ETHPOP") +
  geom_line()

p4

p4 + ylim(-2, 3)

## Warning: Removed 4 row(s) containing missing values (geom_path).

main.R

Nathan

2020-05-18