rm(list=ls()) ##################################################### # Step 1 of the code by Schmertmann & Rau on life expectancy estimates # on the Kreis-level. # Purpose: Assemble Kreis-level age-group data for # 2013-2017 from regionalstatistik.de downloads # Death Counts # License: GPL-2, https://www.gnu.org/licenses/old-licenses/gpl-2.0.html ##################################################### library(tidyverse) # omit metadata on last 55 lines of csv file valid_input_text = head( readLines('../initialdata/deaths-2013-2017.csv'), -55) # skip the metadata on the top 7 lines deaths = read.csv2(text=valid_input_text, skip=7, header = FALSE, na.strings=c('.','-'), col.names = c('year','code','place', 'ages', 'total','totalM','totalF', 'german','germanM','germanF'), colClasses = c('integer','character','character', 'character',rep('integer',6))) # 1. remove lines for national totals (DG) and all-age totals (Insgesamt) # 2. tweak integer codes for Berlin and Hamburg so that they look like the # AGS codes # 3. replace blanks/NAs with zero deaths = deaths %>% filter(code != '', code != 'DG', ages != 'Insgesamt') %>% mutate(code = as.integer(code), AGS = ifelse(code %in% c(2,11), 1000*code, code), place = trimws(place)) %>% select(year,AGS,place,ages,totalM,totalF) %>% replace_na(list(totalM=0,totalF=0)) %>% rename(male=totalM, female=totalF) # add a numerical indicator for the lowest age in each age group deaths$agegroup = c(0,1,seq(5,85,5)) # only retain lines corresponding to the AGS codes for Kreise # in the full 'koordinierte Anfrage' data load('../initialdata/geo.RData') # geo.df, which contains an integer AGS column head(geo.df) deaths = deaths %>% filter(AGS %in% geo.df$AGS) %>% select(year,AGS,place,agegroup,male,female) if (!dir.exists('../data')) { dir.create('../data') } save(deaths,file='../data/deaths.Rdata')