class: center, middle, inverse, title-slide # Laboratorio Bio-demografico ## Lezione 15 - Misurare le migrazioni ### Nicola Barban
Alma Mater Studiorum Università di Bologna
Dipartimento di Scienze Statistiche ### 18 Marzo 2021
--- # Migration ## 1. Stock I dati di *stock* rappresentano il numero di immigrati che vivono in un paese o regione ad una certa **data**. ## 2. Flussi I dati di *flusso* rappresentano il numero di immigrati che entrano o emigrati che lasciano un paese e regione durante un **periodo** di tempo determinato. --- # Migration Stock ```r #install.packages("wpp2019") library(wpp2019) data(migration) data(pop) ``` >Net migration (in thousand) for the specific five-year time period (i.e., from 1 July in year t to 1 July in year t+5 such as the period 1950-1955 refers to the period 1950.5-1955.5 and the mid of the period is 1953.0). --- ```r library(tidyverse) A=migration[,1:17] %>% filter(name=="Italy") %>% pivot_longer(cols=-c(country_code, name), names_to="period", values_to="net_mig") %>% ggplot(aes(x=period, y=net_mig, group=name))+ geom_line()+ theme_bw()+ labs(title="ITALIA", x="Anno", y="Net Migration" ) ``` --- ```r A ``` <img src="lezione15_files/figure-html/unnamed-chunk-3-1.png" width="60%" /> --- # Tengo solo macro-regioni ```r library(knitr) kable(migration$name[migration$country_code>900 & migration$country_code<1000 ], type="html") ``` |x | |:---------------------------------------| |More developed regions | |Less developed regions | |Least developed countries | |Other less developed countries | |Less developed regions, excluding China | |Africa | |Asia | |Europe | |Latin America and the Caribbean | |Northern America | |Oceania | |Sub-Saharan Africa | |Eastern Africa | |Middle Africa | |Southern Africa | |Western Africa | |Northern Africa | |Western Asia | |Central and Southern Asia | |Eastern Asia | |South-Eastern Asia | |Caribbean | |Central America | |South America | |Australia/New Zealand | |Melanesia | |Micronesia | |Polynesia | |Eastern Europe | |Northern Europe | |Southern Europe | |Western Europe | --- ```r names(migration)[1:17]<-names(pop)[1:17] migration_long<- migration[,1:17] %>% pivot_longer(cols=-c(country_code, name), names_to="year", values_to="mig") pop_long<- pop %>% pivot_longer(cols=-c(country_code, name), names_to="year", values_to="pop") %>% filter(pop>10000) ### seleziono paesi con pop> 10 mil mig_pop<-pop_long %>% left_join(migration_long, by=c("country_code", "name", "year")) %>% mutate(net.mig=1000*mig/pop) ``` --- ```r B<- mig_pop %>% ggplot(aes(x=year, y=net.mig, group=name, label=name))+ geom_line(col="grey", alpha=0.2)+ theme_bw()+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ labs(title="Net migration", x="Year", y="Net Migration", caption="Source: World Population Prospect" ) ``` --- ```r B+ geom_line(data=mig_pop %>% filter(name=="Italy"), aes(col="Italy"), alpha=0.8, size=1.1)+ geom_line(data=mig_pop %>% filter(name=="Argentina"), aes(col="Argentina"), alpha=0.8, size=1.1) ``` <img src="lezione15_files/figure-html/unnamed-chunk-7-1.png" width="60%" /> --- # Grafico interattivo ```r library(plotly) ggplotly(B) ```
--- Carico dati Italiani ```r library(readr) IMMprovince <- read_delim("IMMprovince.csv", ";", escape_double = FALSE, trim_ws = TRUE, skip = 114, col_names=c("cod.prov","prov", "cod.citt", "citt", "maschi", "femmine", "totale")) ``` --- # calcolo totale per provincia ```r IMMprovince.tot<- IMMprovince %>% group_by(cod.prov, prov) %>% summarize(tot_stranieri=sum(totale)) %>% ungroup() %>% arrange(-tot_stranieri) %>% mutate(rank_abs=row_number(), cod.prov=as.numeric(cod.prov)) ``` --- # Top 10 province per presenza stranieri ```r kable(IMMprovince.tot %>% filter(rank_abs<=10) %>% select(prov,tot_stranieri )) ``` |prov | tot_stranieri| |:-------|-------------:| |Roma | 509057| |Milano | 459131| |Torino | 210973| |Brescia | 151820| |Napoli | 127803| |Firenze | 123466| |Bologna | 119380| |Bergamo | 119045| |Verona | 106692| |Padova | 93372| --- ```r ISTAT.prov <- read_csv("province.csv", col_types = cols(`Codice provincia` = col_integer()), skip = 1) Pop.prov <-ISTAT.prov %>% filter(Età=="Totale") %>% mutate(Tot.Pop=`Totale Maschi`+`Totale Femmine`) %>% select(cod.prov="Codice provincia" , "Tot.Pop" ) IMMprovince.pop<- IMMprovince.tot %>% full_join(Pop.prov, by="cod.prov") %>% mutate(prop.str= 100*tot_stranieri/Tot.Pop) ``` --- # Top 10 province per presenza stranieri ```r IMMprovince.pop<- IMMprovince.pop %>% arrange(-prop.str) %>% mutate(rank_rel=row_number()) kable(IMMprovince.pop %>% filter(rank_rel<=10) %>% select(prov,prop.str )) ``` |prov | prop.str| |:------------------|--------:| |Prato | 18.48541| |Piacenza | 14.45259| |Parma | 14.07931| |Milano | 14.06080| |Modena | 13.06895| |Mantova | 12.60816| |Firenze | 12.40220| |Reggio nell'Emilia | 12.12442| |Brescia | 12.09300| |Lodi | 11.96903| --- # Calcolo totale per provenienza ```r IMMprovince.citt<- IMMprovince %>% group_by(citt) %>% summarize(tot_stranieri=sum(totale)) %>% ungroup() %>% arrange(-tot_stranieri) %>% mutate(rank_citt=row_number()) kable(IMMprovince.citt %>% filter(rank_citt<=10) %>% select(citt,tot_stranieri )) ``` |citt | tot_stranieri| |:---------------------------|-------------:| |Romania | 1145718| |Albania | 421591| |Marocco | 414249| |Cinese, Repubblica Popolare | 288923| |Ucraina | 228560| |Filippine | 157665| |India | 153209| |Bangladesh | 138895| |Egitto | 128095| |Pakistan | 121609| --- # Trasformo in dataset wide ### (top 5 provenienze) ```r IMMprovince_wide<- IMMprovince %>% filter(citt %in% c("Romania", "Albania", "Marocco", "Cinese, Repubblica Popolare", "Ucraina")) %>% select(cod.prov, prov, totale, citt) %>% pivot_wider( names_from=citt, values_from=totale) %>% mutate(cod.prov=as.numeric(cod.prov)) ``` --- ```r library(sf) library(tmap) geo_prov<-st_read("shapefileProvince/ProvCM01012018_g_WGS84.shp") ``` ``` ## Reading layer `ProvCM01012018_g_WGS84' from data source `/Users/nicola/Dropbox/Teaching/2021/labSOCIODEMO/webpage/LaboratorioBioDemografico2021/lezione15/shapefileProvince/ProvCM01012018_g_WGS84.shp' using driver `ESRI Shapefile' ## Simple feature collection with 107 features and 12 fields ## geometry type: MULTIPOLYGON ## dimension: XY ## bbox: xmin: 313279.3 ymin: 3933846 xmax: 1312016 ymax: 5220292 ## CRS: 32632 ``` ```r geo_prov<- geo_prov %>% mutate(cod.prov=COD_PROV) %>% left_join(IMMprovince_wide , by="cod.prov") ``` --- ```r tm_shape(geo_prov) + tm_polygons(col="Romania", palette="Greens", style="cont") ``` <img src="lezione15_files/figure-html/unnamed-chunk-17-1.png" width="60%" /> --- ```r tm_shape(geo_prov) + tm_polygons(col="Albania", palette="Reds", style="cont") ``` <img src="lezione15_files/figure-html/unnamed-chunk-18-1.png" width="60%" /> --- ```r tm_shape(geo_prov) + tm_polygons(col="Marocco", palette="Blues", style="cont") ``` <img src="lezione15_files/figure-html/unnamed-chunk-19-1.png" width="60%" /> --- ```r tm_shape(geo_prov) + tm_polygons(col="Cinese, Repubblica Popolare", palette="Purples", style="cont") ``` <img src="lezione15_files/figure-html/unnamed-chunk-20-1.png" width="60%" /> --- ```r tm_shape(geo_prov) + tm_polygons(col="Ucraina", palette="Oranges", style="cont") ``` <img src="lezione15_files/figure-html/unnamed-chunk-21-1.png" width="60%" /> --- # Dati di flusso https://science.sciencemag.org/content/343/6178/1520 >Migrant “stock” data—the number of people living in a country other than the one in which they were born—are frequently used to understand contemporary trends in international migration, but the data are severely limited. Abel and Sander (p. 1520) present a set of global bilateral migration flows estimated from sequential stock data in 5-year intervals.** The percentage of the world population moving over 5-year periods has not shown dramatic changes between 1995 and 2010. **People from individual African countries tended to move within the continent, whereas people from Europe tended to move to very diverse locations.** --- # Chord diagrams https://guyabel.com/post/animated-directional-chord-diagrams/ ```r library(migest) d0 <- read_csv(system.file("imr", "reg_flow.csv", package = "migest")) d0 ``` ``` ## # A tibble: 891 x 4 ## year0 orig_reg dest_reg flow ## <dbl> <chr> <chr> <dbl> ## 1 1960 Africa Africa 1377791 ## 2 1960 Africa Eastern Asia 5952 ## 3 1960 Africa Eastern Europe & Central Asia 7303 ## 4 1960 Africa Europe 919252 ## 5 1960 Africa Latin America & Caribbean 15796 ## 6 1960 Africa Northern America 82463 ## 7 1960 Africa Oceania 32825 ## 8 1960 Africa Southern Asia 35603 ## 9 1960 Africa Western Asia 106580 ## 10 1960 Eastern Asia Africa 37301 ## # … with 881 more rows ``` --- ```r d1 <- read_csv(system.file("vidwp", "reg_plot.csv", package = "migest")) d1 ``` ``` ## # A tibble: 9 x 5 ## region order1 col1 reg1 reg2 ## <chr> <dbl> <chr> <chr> <chr> ## 1 Northern America 1 #40A4D8 Northern America ## 2 Africa 2 #33BEB7 Africa NA ## 3 Europe 3 #B2C224 Europe NA ## 4 Eastern Europe & Central Asia 4 #FECC2F Eastern Europe & Central Asia ## 5 Western Asia 5 #FBA127 Western Asia ## 6 Southern Asia 6 #F66320 Southern Asia ## 7 Eastern Asia 7 #DB3937 Eastern Asia ## 8 Oceania 8 #A463D7 Oceania NA ## 9 Latin America & Caribbean 9 #0C5BCE Latin America & Caribbean ``` --- # grafico Base ```r library(circlize) chordDiagram(x = d0 %>% filter(year0==1960) %>% select(orig_reg,dest_reg,flow), ) ``` <img src="lezione15_files/figure-html/unnamed-chunk-24-1.png" width="40%" /> --- <img src="./plot-gif/globalchord0.png" alt="" class="center" width="550" height="550" > --- # Aggiungo colori e altri dettagli ```r dev.off() ``` ``` ## null device ## 1 ``` ```r chordDiagram(x = d0 %>% filter(year0==1960) %>% select(orig_reg,dest_reg,flow), directional = 1, order = d1$region, grid.col = d1$col1, annotationTrack = "grid", transparency = 0.25, annotationTrackHeight = c(0.05, 0.1), direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow", diffHeight = -0.04, link.sort = TRUE, link.largest.ontop = TRUE) ``` --- # Aggiungo etichette ```r circos.track(track.index = 1, bg.border = NA, panel.fun = function(x, y) { xlim = get.cell.meta.data("xlim") sector.index = get.cell.meta.data("sector.index") reg1 = d1 %>% filter(region == sector.index) %>% pull(reg1) reg2 = d1 %>% filter(region == sector.index) %>% pull(reg2) circos.text(x = mean(xlim), y = ifelse(is.na(reg2), 3, 4), labels = reg1, facing = "bending", cex = 1.1) circos.text(x = mean(xlim), y = 2.75, labels = reg2, facing = "bending", cex = 1.1) circos.axis(h = "top", labels.cex = 0.8, labels.niceFacing = FALSE, labels.pos.adjust = FALSE) }) ``` --- # Creare animazioni > This creates larger data frame d2, with 100 observations for each corridor, one for each frame in the animation. In the original data d0 there are only 11 observations for each corridor, one for each five-year period. ```r library(tweenr) d2 <- d0 %>% mutate(corridor = paste(orig_reg, dest_reg, sep = " -> ")) %>% select(corridor, year0, flow) %>% mutate(ease = "linear") %>% tween_elements(time = "year0", group = "corridor", ease = "ease", nframes = 100) %>% tbl_df() ``` --- # ordino il dataset ```r d2 <- d2 %>% separate(col = .group, into = c("orig_reg", "dest_reg"), sep = " -> ") %>% select(orig_reg, dest_reg, flow, everything()) %>% mutate(flow = flow/1e06) d2 ``` ``` ## # A tibble: 8,181 x 5 ## orig_reg dest_reg flow year0 .frame ## <chr> <chr> <dbl> <dbl> <int> ## 1 Africa Africa 1.38 1960 0 ## 2 Africa Eastern Asia 0.00595 1960 0 ## 3 Africa Eastern Europe & Central Asia 0.00730 1960 0 ## 4 Africa Europe 0.919 1960 0 ## 5 Africa Latin America & Caribbean 0.0158 1960 0 ## 6 Africa Northern America 0.0825 1960 0 ## 7 Africa Oceania 0.0328 1960 0 ## 8 Africa Southern Asia 0.0356 1960 0 ## 9 Africa Western Asia 0.107 1960 0 ## 10 Eastern Asia Africa 0.0373 1960 0 ## # … with 8,171 more rows ``` --- # creo un grafico per ogni frame ```r dev.off() dir.create("./plot-gif/") library(circlize) for(f in unique(d2$.frame)){ # open a PNG plotting device png(file = paste0("./plot-gif/globalchord", f, ".png"), height = 7, width = 7, units = "in", res = 500) # initialise the circos plot circos.clear() par(mar = rep(0, 4), cex=1) circos.par(start.degree = 90, track.margin=c(-0.1, 0.1), gap.degree = 4, points.overflow.warning = FALSE) # plot the chord diagram chordDiagram(d2 %>% filter( .frame == f) %>% select(orig_reg,dest_reg,flow ), directional = 1, order = d1$region, grid.col = d1$col1, annotationTrack = "grid", transparency = 0.25, annotationTrackHeight = c(0.05, 0.1), direction.type = c("diffHeight", "arrows"), link.arr.type = "big.arrow", diffHeight = -0.04, link.sort = TRUE, link.largest.ontop = TRUE) # add labels and axis circos.track(track.index = 1, bg.border = NA, panel.fun = function(x, y) { xlim = get.cell.meta.data("xlim") sector.index = get.cell.meta.data("sector.index") reg1 = d1 %>% filter(region == sector.index) %>% pull(reg1) reg2 = d1 %>% filter(region == sector.index) %>% pull(reg2) circos.text(x = mean(xlim), y = ifelse(is.na(reg2), 3, 4), labels = reg1, facing = "bending", cex = 1.1) circos.text(x = mean(xlim), y = 2.75, labels = reg2, facing = "bending", cex = 1.1) circos.axis(h = "top", labels.cex = 0.8, labels.niceFacing = FALSE, labels.pos.adjust = FALSE) }) text(-.7,1,glue('Migrant Population: {d2 %>% filter(.frame==f) %>% select(year0) %>% summarize(mean(year0)) %>% round(0)} ')) text(.8,1,glue('{d2 %>% filter(.frame==f) %>% select(flow) %>% summarize(sum(flow)) %>% round(1)} million')) # close plotting device dev.off() } ``` --- # Creo una GIF ```r library(magick) img <- image_read(path = "./plot-gif/globalchord0.png") for(f in unique(d2$.frame)[-1]){ img0 <- image_read(path = paste0("./plot-gif/globalchord",f,".png")) img <- c(img, img0) message(f) } img1 <- image_scale(image = img, geometry = "720x720") ani0 <- image_animate(image = img1, fps = 10) image_write(image = ani0, path = "./plot-gif/globalchord.gif") ``` --- <img src="./plot-gif/globalchord.gif" alt="" class="center" width="550" height="550" >