This notebook has been designed for the preparation of the figure,tables and maps dedicated to the paper to be submitted to the journal International Communication Gazette. It is delivered with the final version of the paper.
# Define directory
setwd("/Users/claudegrasland1/Documents/cg/publi/2018/icg2018/notebook")
# Install packages
library(dplyr)
Attachement du package : ‘dplyr’
The following objects are masked from ‘package:stats’:
filter, lag
The following objects are masked from ‘package:base’:
intersect, setdiff, setequal, union
library(ggplot2)
le package ‘ggplot2’ a été compilé avec la version R 3.4.4
library(sf)
Linking to GEOS 3.6.1, GDAL 2.1.3, proj.4 4.9.3
library(cartography)
Le chargement a nécessité le package : sp
library(xtable)
# Load rss informations
rss<-read.table("data/rss32_list_media.csv",
sep="\t",
header=T,
stringsAsFactors = F,
encoding="UTF-8")
rss_list<-rss[,c(1,2,3,4,7,24)]
# Define the list of media of interest
#myrss<-c("en_CAN_starca_int","en_CAN_vansun_int",
# "en_USA_usatdy_int","en_USA_nytime_int",
# "es_MEX_cronic_int","es_MEX_Inform_int",
# "es_ESP_catalu_int", "es_ESP_elpais_int",
# "en_GBR_dailyt_int", "en_GBR_guardi_int",
# "fr_BEL_lesoir_int", "fr_BEL_derheu_int",
# "fr_FRA_figaro_int", "fr_FRA_antill_int",
# "en_AUS_theage_int", "en_AUS_mohera_int")
table(rss$m)
< table of extent 0 >
rss_list<-rss[rss$Name_feed!="fr_DZA_elwata_int",c(1,2,3,4,7,24)]
names(rss_list)<-c("Code","Name","Language","Country","URL","Items")
#rss_list
tabres<-rss_list
tabres$Code<-substr(tabres$Code,1,12)
sum(tabres$Items)
[1] 321269
tli<-xtable(x=tabres)
print(tli,include.rownames=F)
% latex table generated in R 3.4.3 by xtable 1.8-2 package
% Wed Aug 29 10:21:13 2018
\begin{table}[ht]
\centering
\begin{tabular}{lllllr}
\hline
Code & Name & Language & Country & URL & Items \\
\hline
en\_AUS\_austr & the australian & en & AUS & http://www.theaustralian.com.au & 5945 \\
en\_AUS\_dtele & Daily Telegraph (AUS) & en & AUS & http://www.dailytelegraph.com.au/ & 5881 \\
en\_AUS\_moher & The Sydney Morning Herald & en & AUS & http://www.smh.com.au/ & 16916 \\
en\_AUS\_theag & The Age & en & AUS & http://www.theage.com.au/ & 16675 \\
en\_CAN\_starc & The Star & en & CAN & http://www.thestar.com & 7292 \\
en\_CAN\_vansu & The Vancouver Sun & en & CAN & http://www.vancouversun.com & 5096 \\
en\_CHN\_china & China Daily & en & CHN & http://www.chinadaily.com.cn & 11730 \\
en\_GBR\_daily & Daily telegraph & en & GBR & http://www.telegraph.co.uk/ & 19054 \\
en\_GBR\_finat & Financial Times & en & GBR & http://www.ft.com & 9073 \\
en\_GBR\_guard & The Guardian & en & GBR & http://www.theguardian.com/ & 55573 \\
en\_IND\_tindi & The times of India & en & IND & http://timesofindia.indiatimes.com & 12784 \\
en\_MLT\_tmalt & times of malta & en & MLT & http://www.timesofmalta.com & 7115 \\
en\_USA\_latim & The Los Angeles Times & en & USA & http://www.latimes.com/ & 5362 \\
en\_USA\_nytim & The New York Times & en & USA & http://www.nytimes.com & 14963 \\
en\_USA\_usatd & USA Today & en & USA & http://www.usatoday.com/ & 8535 \\
en\_ZWE\_chron & Chronicle & en & ZWE & http://www.chronicle.co.zw & 2095 \\
es\_BOL\_patri & La patria & es & BOL & http://lapatriaenlinea.com & 3410 \\
es\_CHL\_terce & La Tercera & es & CHL & http://www.latercera.com/ & 7357 \\
es\_ESP\_catal & El periodico de Catalunya & es & ESP & http://www.elperiodico.com/es/ & 7594 \\
es\_ESP\_elpai & El Pais & es & ESP & http://elpais.com/ & 13699 \\
es\_ESP\_farod & Faro de vigo & es & ESP & http://www.farodevigo.es/ & 2971 \\
es\_MEX\_croni & La cronica de hoy & es & MEX & http://www.cronica.com.mx/ & 6195 \\
es\_MEX\_Infor & El Informador & es & MEX & http://www.informador.com.mx & 10802 \\
es\_VEN\_unive & El Universal (VEN) & es & VEN & http://www.eluniversal.com/ & 17663 \\
fr\_BEL\_derhe & Derniere Heure & fr & BEL & http://www.dhnet.be/ & 4367 \\
fr\_BEL\_lesoi & Le soir & fr & BEL & http://www.lesoir.be & 5850 \\
fr\_FRA\_antil & France Antilles & fr & FRA & http://www.guadeloupe.franceantilles.fr & 11017 \\
fr\_FRA\_figar & Le Figaro & fr & FRA & http://www.lefigaro.fr/ & 5344 \\
fr\_FRA\_lepar & Le Parisien & fr & FRA & http://www.leparisien.fr/ & 5061 \\
fr\_FRA\_liber & Liberation & fr & FRA & http://www.liberation.fr/ & 5299 \\
fr\_FRA\_lmond & Le Monde & fr & FRA & http://lemonde.fr & 10551 \\
\hline
\end{tabular}
\end{table}
write.table(tli,
"tab/table_31_rss_flows.csv",
sep=";",
row.names=F,
fileEncoding = "UTF-8")
For different reason, we can decide to select more or less flows and a specific time period. It ’s important to do it before to compute marginal sums.
library(dplyr)
# ============== Load CUBE ====================================
## Load cube MTS (multidimensional array) ##
filecube <-"data/geomedia_cube_2015.csv"
dim <- scan (file=filecube, sep=",", nlines=1)
Read 3 items
d <- scan (file=filecube, sep=",", skip=length(dim)+1, nlines=1)
Read 325632 items
dimnames <- list()
for (i in 1:length(dim)) {
dimnames[[i]] <- scan (file=filecube, what="character",
sep=",", skip=i, nlines=1, na.strings="NULL")
}
Read 32 items
Read 53 items
Read 192 items
cub <- array(d,dim,dimnames)
media <- dimnames(cub)[[1]]
time<-dimnames(cub)[[2]]
place <- dimnames(cub)[[3]]
## Transform of cube into table ######################
x<- data.frame(
media = rep(media, time=length(time)*length(place)),
time = rep(time, time=length(place), each=length(media)),
place = rep(place, each=length(media)*length(time)),
stringsAsFactors = FALSE
)
x$observed <- round(as.vector(cub),0)
x <- x[order(x$media,x$time,x$place),]
names(x)<-c("m","t","p","Fmtp")
table(x$m)
en_AUS_austra_int en_AUS_dteleg_int en_AUS_mohera_int en_AUS_theage_int
10176 10176 10176 10176
en_CAN_starca_int en_CAN_vansun_int en_CHN_chinad_int en_GBR_dailyt_int
10176 10176 10176 10176
en_GBR_finati_int en_GBR_guardi_int en_IND_tindia_int en_MLT_tmalta_int
10176 10176 10176 10176
en_USA_latime_int en_USA_nytime_int en_USA_usatdy_int en_ZWE_chroni_int
10176 10176 10176 10176
es_BOL_patria_int es_CHL_tercer_int es_ESP_catalu_int es_ESP_elpais_int
10176 10176 10176 10176
es_ESP_farode_int es_MEX_cronic_int es_MEX_Inform_int es_VEN_univer_int
10176 10176 10176 10176
fr_BEL_derheu_int fr_BEL_lesoir_int fr_DZA_elwata_int fr_FRA_antill_int
10176 10176 10176 10176
fr_FRA_figaro_int fr_FRA_lepari_int fr_FRA_libera_int fr_FRA_lmonde_int
10176 10176 10176 10176
sel<-x[x$m!="fr_DZA_elwata_int",]
#select time period
#sel<-sel[sel$t!="2014-12-29",]
#sel<-sel[sel$t!="2015-01-05",]
#sel<-sel[sel$t!="2015-01-12",]
#sel<-sel[sel$t!="2015-01-19",]
#sel<-sel[sel$t!="2015-11-30",]
#sel<-sel[sel$t!="2015-12-07",]
#sel<-sel[sel$t!="2015-12-14",]
#sel<-sel[sel$t!="2015-12-21",]
#sel<-sel[sel$t!="2015-12-28",]
# eliminate self reference
x<-sel[substr(sel$m,4,6)!=sel$p,]
# eliminate weeks with insufficient number of news
Fmt<-x%>%group_by(m,t)%>%summarise(Fmt = sum(Fmtp))
x<-merge(x,Fmt,by=c("m","t"),all.x=T)
x<-x[x$Fmt>20,]
x<-x[,-5]
# Creation of lag variable
y<-x
y$t<-as.character(as.Date(y$t)+7)
names(y)<-c("m","t","p","Fmtp_lag")
x<-merge(x,y,by=c("m","t","p"),all.x=F,all.y=F)
# Check overdispersion
m<-mean(x$Fmtp)
s<-sd(x$Fmtp)
ratio<-s/m
paste("mean = ",round(m,2),"sta.dev.=",round(s,3),"ratio = ",round(ratio,3))
[1] "mean = 0.81 sta.dev.= 4.374 ratio = 5.42"
#create table of news per week for each rss
Fmt<-x%>%group_by(m,t)%>%summarise(Fmt = sum(Fmtp))
Fmt$mt<-paste(Fmt$m,Fmt$t,sep="_")
Fmt<-as.data.frame(Fmt)
Fmt$m<-as.factor(Fmt$m)
check<-Fmt%>%group_by(m)%>%summarise(min=min(Fmt),q1=quantile(Fmt,0.25),median=quantile(Fmt,0.5),q3=quantile(Fmt,0.75),max=max(Fmt) )
package ‘bindrcpp’ was built under R version 3.4.4
check$CIQ<-(check$q3-check$q1)/check$median
write.table(check,"tab/rss_news_per_week_31.csv")
The marginal sums will be usefull for all types of models and for many graphics
Fm<-x%>%group_by(m)%>%summarise(Fm = sum(Fmtp))
x<-merge(x,Fm,by=c("m"),all.x=T)
Ft<-x%>%group_by(t)%>%summarise(Ft = sum(Fmtp))
x<-merge(x,Ft,by=c("t"),all.x=T)
Fp<-x%>%group_by(p)%>%summarise(Fp = sum(Fmtp))
x<-merge(x,Fp,by=c("p"),all.x=T)
Fmt<-x%>%group_by(m,t)%>%summarise(Fmt = sum(Fmtp))
x<-merge(x,Fmt,by=c("m","t"),all.x=T)
Fpt<-x%>%group_by(p,t)%>%summarise(Fpt = sum(Fmtp))
x<-merge(x,Fpt,by=c("p","t"),all.x=T)
Fmp<-x%>%group_by(m,p)%>%summarise(Fmp = sum(Fmtp))
x<-merge(x,Fmp,by=c("m","p"),all.x=T)
head(x)
# Create weighted variable (equal weight per media and week)
x$Fmtp_w<-round(x$Fmtp*mean(x$Fmt)/x$Fmt,0)
quartz_off_screen
2
quartz_off_screen
2
We propose to compare different measures of salience derive from different procedures of agregation. In all case we measure the salience as a % of world share.
# Non weighted
Fp<-x%>%group_by(p)%>%summarise(Fp = sum(Fmtp))
Fp<-Fp[order(Fp$Fp,decreasing = T),]
Fp$rank<-dim(Fp)[1]-rank(Fp$Fp)+1
Fp$prob<-100*Fp$Fp/sum(Fp$Fp)
maxFp<-max(Fp$prob)
head(Fp,20)
#p1<- ggplot(data = Fp, aes(y = prob, x = rank))
#p1+geom_line()+scale_y_log10(name="share of news (log. scale)")
# weighted
Fp_w<-x%>%group_by(p)%>%summarise(Fp_w = sum(Fmtp_w),na.rm=T)
Fp_w<-Fp_w[order(Fp_w$Fp_w,decreasing = T),]
Fp_w$rank_w<-dim(Fp_w)[1]-rank(Fp_w$Fp_w)+1
Fp_w$prob_w<-100*Fp_w$Fp_w/sum(Fp_w$Fp_w)
maxFp_w<-max(Fp_w$prob_w)
head(Fp_w,20)
#p1<- ggplot(data = Fp, aes(y = prob, x = rank))
#p1+geom_line()+scale_y_log10(name="share of news (log. scale)")
# Synthesis
code<-read.table("data/codes2015.csv",
sep=";",
header=T,
stringsAsFactors = F,
encoding="UTF-8")
tabres<-code[,c(2,3)]
siz1<-Fp[,c("p","Fp","prob","rank")]
tabres<-merge(tabres,siz1,by.x="ISO3",by.y="p")
siz2<-Fp_w[,c("p","Fp_w","prob_w","rank_w")]
tabres<-merge(tabres,siz2,by.x="ISO3",by.y="p")
tabres<-tabres[order(tabres$rank),]
write.table(tabres,"tab/salience_of_nation.csv")
tli<-xtable(tabres[tabres$prob>1,],digits=c(0,0,0,0,2,0,0,2,0))
print(tli,include.rownames=F)
% latex table generated in R 3.4.3 by xtable 1.8-2 package
% Wed Aug 29 10:21:48 2018
\begin{table}[ht]
\centering
\begin{tabular}{llrrrrrr}
\hline
ISO3 & name & Fp & prob & rank & Fp\_w & prob\_w & rank\_w \\
\hline
USA & United States of America & 38878 & 15.97 & 1 & 37042 & 15.19 & 1 \\
FRA & France & 15106 & 6.20 & 2 & 14908 & 6.11 & 2 \\
GRC & Greece & 9698 & 3.98 & 3 & 9467 & 3.88 & 5 \\
SYR & Syria & 9098 & 3.74 & 4 & 9521 & 3.91 & 4 \\
CHN & China & 9075 & 3.73 & 5 & 8570 & 3.52 & 7 \\
GBR & United Kingdom & 8714 & 3.58 & 6 & 10371 & 4.25 & 3 \\
RUS & Russian Federation & 8656 & 3.55 & 7 & 9077 & 3.72 & 6 \\
DEU & Germany & 6760 & 2.78 & 8 & 6799 & 2.79 & 8 \\
AUS & Australia & 4871 & 2.00 & 9 & 2298 & 0.94 & 32 \\
TUR & Turkey & 4744 & 1.95 & 10 & 5127 & 2.10 & 9 \\
ISR & Israel & 4651 & 1.91 & 11 & 4973 & 2.04 & 10 \\
IRQ & Iraq & 4221 & 1.73 & 12 & 4693 & 1.92 & 11 \\
IRN & Iran & 4211 & 1.73 & 13 & 4044 & 1.66 & 12 \\
ESP & Spain & 3926 & 1.61 & 14 & 3568 & 1.46 & 17 \\
JPN & Japan & 3649 & 1.50 & 15 & 3813 & 1.56 & 13 \\
MEX & Mexico & 3563 & 1.46 & 16 & 3526 & 1.45 & 18 \\
VAT & Holy See & 3538 & 1.45 & 17 & 3716 & 1.52 & 14 \\
ITA & Italy & 3495 & 1.44 & 18 & 3490 & 1.43 & 19 \\
EGY & Egypt & 3403 & 1.40 & 19 & 3605 & 1.48 & 15 \\
UKR & Ukraine & 3383 & 1.39 & 20 & 3579 & 1.47 & 16 \\
IND & India & 3199 & 1.31 & 21 & 3100 & 1.27 & 22 \\
AFG & Afghanistan & 3139 & 1.29 & 22 & 3231 & 1.33 & 20 \\
BRA & Brazil & 3028 & 1.24 & 23 & 2863 & 1.17 & 25 \\
ARG & Argentina & 2983 & 1.22 & 24 & 3147 & 1.29 & 21 \\
NPL & Nepal & 2813 & 1.16 & 25 & 2937 & 1.20 & 23 \\
IDN & Indonesia & 2731 & 1.12 & 26 & 2739 & 1.12 & 26 \\
PSE & Palestine & 2577 & 1.06 & 27 & 2636 & 1.08 & 28 \\
YEM & Yemen & 2548 & 1.05 & 28 & 2705 & 1.11 & 27 \\
BEL & Belgium & 2445 & 1.00 & 29 & 2437 & 1.00 & 29 \\
\hline
\end{tabular}
\end{table}
# ============== Load Explanatory variables ====================================
## Load distance file (source : CEPII)
filedist <-"data/dist_cepii2015.csv"
dist<- read.table(filedist,
sep=";",
dec=",",
header = T,
encoding = "UTF-8")
## Load size file (source : Worldbank)
filesize <-"data/size2015_estimate.csv"
size<- read.table(filesize,
sep=";",
dec=",",
header = T,
encoding = "UTF-8")
## Merge size and distance files
int <-merge(dist, size, by.x="iso_d", by.y="CODE",all.x=F,all.y=T)
summary(int)
iso_d iso_o contig comlang_off comlang_ethno
AFG : 228 ABW : 192 Min. :0.00000 Min. :0.0000 Min. :0.0000
AGO : 228 AFG : 192 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
ALB : 228 AGO : 192 Median :0.00000 Median :0.0000 Median :0.0000
ARE : 228 AIA : 192 Mean :0.01483 Mean :0.1549 Mean :0.1449
ARG : 228 ALB : 192 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000
ARM : 228 AND : 192 Max. :1.00000 Max. :1.0000 Max. :1.0000
(Other):42408 (Other):42624
colony comcol curcol col45
Min. :0.0000 Min. :0.0000 Min. :0.0000000 Min. :0.000000
1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000000 1st Qu.:0.000000
Median :0.0000 Median :0.0000 Median :0.0000000 Median :0.000000
Mean :0.0103 Mean :0.1092 Mean :0.0008224 Mean :0.006556
3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000000 3rd Qu.:0.000000
Max. :1.0000 Max. :1.0000 Max. :1.0000000 Max. :1.000000
smctry dist distcap distw distwces
Min. :0.000000 Min. : 1 Min. : 1 Min. : 1 Min. : 0
1st Qu.:0.000000 1st Qu.: 4505 1st Qu.: 4484 1st Qu.: 4485 1st Qu.: 4465
Median :0.000000 Median : 7708 Median : 7699 Median : 7733 Median : 7720
Mean :0.007881 Mean : 8107 Mean : 8095 Mean : 8110 Mean : 8088
3rd Qu.:0.000000 3rd Qu.:11423 3rd Qu.:11406 3rd Qu.:11403 3rd Qu.:11384
Max. :1.000000 Max. :19951 Max. :19951 Max. :19889 Max. :19889
NOM SUP ARA POP
Afghanistan : 228 Min. : 1 Min. : 1 Min. : 1
Albania : 228 1st Qu.: 27994 1st Qu.: 217 1st Qu.: 2121
Algeria : 228 Median : 137254 Median : 1304 Median : 8334
Angola : 228 Mean : 700950 Mean : 7370 Mean : 38302
Antigua and Barbuda: 228 3rd Qu.: 537893 3rd Qu.: 4750 3rd Qu.: 26976
Argentina : 228 Max. :17098249 Max. :156984 Max. :1379113
(Other) :42408
URB GDP GNI MIL
Min. : 1 Min. : 211 Min. : 224 Min. : 7.0
1st Qu.: 1214 1st Qu.: 18817 1st Qu.: 7816 1st Qu.: 206.5
Median : 4224 Median : 65128 Median : 28120 Median : 997.0
Mean : 20595 Mean : 563737 Mean : 391107 Mean : 13275.0
3rd Qu.: 11924 3rd Qu.: 323106 3rd Qu.: 190423 3rd Qu.: 6155.5
Max. :770484 Max. :19061883 Max. :16809664 Max. :559789.0
CO2
Min. : 1
1st Qu.: 2285
Median : 12622
Mean : 175546
3rd Qu.: 67408
Max. :10296625
## select variables used in the model
int<-int[,c("iso_o","iso_d","distw","comlang_ethno", "comlang_off","SUP","POP","GDP")]
names(int)<-c("m_p","p","DIS","LAN1","LAN2","SUP","POP","GDP")
## merge with flows
x$m_p<-substr(x$m,4,6)
tab<-merge(x,int,by=c("m_p","p"),all.x=T,all.y=F)
sel<-tab
# create variables of interest
sel$V1_sup<-log(sel$SUP)
sel$V2_dem<-log(sel$POP/sel$SUP)
sel$V3_eco<-log(sel$GDP/sel$POP)
sel$V4_PM5<-as.numeric(sel$p %in% c("USA","RUS","CHN","FRA","GBR"))
sel$V5_G14<-as.numeric(sel$p %in% c("ZAF","DEU","SAU","ARG","AUS",
"BRA","CAN","KOR","IND","IDN",
"ITA","JPN","MEX","TUR"))
sel$V6_VAT<-as.numeric(sel$p=="VAT")
sel$V7_dis<-log(1/sel$DIS)
sel$V8_lan<-sel$LAN1+sel$LAN2
sel$V8_lan[sel$V8_lan==2]<-1
sel$V9_time<-as.numeric(sel$Fmtp_lag>0)
# Final table
sel<-sel[,c("m","p","t","Fmtp","Fmtp_w", "Fmt",
"V1_sup","V2_dem","V3_eco",
"V4_PM5","V5_G14","V6_VAT",
"V7_dis","V8_lan","V9_time")]
summary(sel)
m p t Fmtp
Length:301780 Length:301780 Length:301780 Min. : 0.0000
Class :character Class :character Class :character 1st Qu.: 0.0000
Mode :character Mode :character Mode :character Median : 0.0000
Mean : 0.8069
3rd Qu.: 0.0000
Max. :327.0000
Fmtp_w Fmt V1_sup V2_dem
Min. : 0.0000 Min. : 21.0 Min. : 0.00 Min. :-8.900
1st Qu.: 0.0000 1st Qu.: 73.0 1st Qu.:10.23 1st Qu.:-3.513
Median : 0.0000 Median : 117.0 Median :11.79 Median :-2.534
Mean : 0.8079 Mean : 154.1 Mean :11.45 Mean :-2.645
3rd Qu.: 0.0000 3rd Qu.: 196.0 3rd Qu.:13.18 3rd Qu.:-1.736
Max. :126.0000 Max. :1208.0 Max. :16.65 Max. : 3.104
V3_eco V4_PM5 V5_G14 V6_VAT
Min. :-0.5426 Min. :0.00000 Min. :0.00000 Min. :0.000000
1st Qu.: 1.1933 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
Median : 2.3774 Median :0.00000 Median :0.00000 Median :0.000000
Mean : 2.2674 Mean :0.02413 Mean :0.07177 Mean :0.005236
3rd Qu.: 3.2075 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
Max. : 6.4457 Max. :1.00000 Max. :1.00000 Max. :1.000000
V7_dis V8_lan V9_time
Min. :-9.886 Min. :0.0000 Min. :0.0000
1st Qu.:-9.367 1st Qu.:0.0000 1st Qu.:0.0000
Median :-9.023 Median :0.0000 Median :0.0000
Mean :-8.827 Mean :0.2398 Mean :0.1853
3rd Qu.:-8.456 3rd Qu.:0.0000 3rd Qu.:0.0000
Max. :-5.081 Max. :1.0000 Max. :1.0000
table(sel$p)
AFG AGO ALB ARE ARG ARM ATG AUS AUT AZE BDI BEL BEN BFA BGD BGR BHR
1580 1580 1580 1580 1580 1580 1580 1377 1580 1580 1580 1480 1580 1580 1580 1580 1580
BHS BIH BLR BLZ BOL BRA BRB BRN BTN BWA CAF CAN CHE CHL CHN CIV CMR
1580 1580 1580 1580 1534 1580 1580 1580 1580 1580 1580 1478 1580 1528 1528 1580 1580
COD COG COL COM CPV CRI CUB CYP CZE DEU DJI DMA DNK DOM DZA ECU EGY
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580
ERI ESH ESP EST ETH FIN FJI FRA FSM GAB GBR GEO GHA GIN GMB GNB GNQ
1580 1580 1426 1580 1580 1580 1580 1327 1580 1580 1424 1580 1580 1580 1580 1580 1580
GRC GRD GRL GTM GUY HKG HND HRV HTI HUN IDN IND IRL IRN IRQ ISL ISR
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1528 1580 1580 1580 1580 1580
ITA JAM JOR JPN KAZ KEN KGZ KHM KIR KOR KWT LAO LBN LBR LBY LCA LKA
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580
LSO LTU LUX LVA MAC MAR MDA MDG MDV MEX MKD MLI MLT MMR MNE MNG MOZ
1580 1580 1580 1580 1580 1580 1580 1580 1580 1476 1580 1580 1528 1580 1580 1580 1580
MRT MUS MWI MYS NAM NER NGA NIC NLD NOR NPL NZL OMN PAK PAN PER PHL
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580
PNG POL PRK PRT PRY PSE QAT ROU RUS RWA SAU SDN SEN SGP SLB SLE SLV
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580
SOM SRB SSD STP SUR SVK SVN SWE SWZ SYC SYR TCD TGO THA TJK TKM TLS
1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580 1580
TON TTO TUN TUR TWN TZA UGA UKR URY USA UZB VAT VCT VEN VNM VUT WSM
1580 1580 1580 1580 1580 1580 1580 1580 1580 1424 1580 1580 1580 1528 1580 1580 1580
XKX YEM ZAF ZMB ZWE
1580 1580 1580 1580 1534
We compare three models non weighted or weighted
library(pscl)
Classes and Methods for R developed in the
Political Science Computational Laboratory
Department of Political Science
Stanford University
Simon Jackman
hurdle and zeroinfl functions by Achim Zeileis
library(boot)
library(xtable)
library(MASS)
Attachement du package : ‘MASS’
The following object is masked from ‘package:dplyr’:
select
# NON WEIGHTED MODELS
## POISSON model
poi<-glm(Fmtp~log(Fmt)+
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel,
family=poisson)
summary(poi)
Call:
glm(formula = Fmtp ~ log(Fmt) + V1_sup + V2_dem + V3_eco + V4_PM5 +
V5_G14 + V6_VAT + V7_dis + V8_lan + V9_time, family = poisson,
data = sel)
Deviance Residuals:
Min 1Q Median 3Q Max
-7.7927 -0.6650 -0.4732 -0.3151 27.6540
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.725246 0.032757 -235.835 < 2e-16 ***
log(Fmt) 0.763621 0.002572 296.906 < 2e-16 ***
V1_sup 0.321263 0.001945 165.184 < 2e-16 ***
V2_dem 0.270795 0.002230 121.440 < 2e-16 ***
V3_eco 0.290269 0.002670 108.735 < 2e-16 ***
V4_PM5 0.924262 0.008150 113.409 < 2e-16 ***
V5_G14 -0.023443 0.007076 -3.313 0.000923 ***
V6_VAT 2.833359 0.025301 111.987 < 2e-16 ***
V7_dis 0.180234 0.002425 74.318 < 2e-16 ***
V8_lan 0.481741 0.004277 112.627 < 2e-16 ***
V9_time 1.781037 0.006408 277.955 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1183177 on 301779 degrees of freedom
Residual deviance: 464340 on 301769 degrees of freedom
AIC: 622430
Number of Fisher Scoring iterations: 6
## NEGATIVE BINOMIAL model
nbi<-glm.nb(Fmtp~log(Fmt)+
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel)
summary(nbi)
Call:
glm.nb(formula = Fmtp ~ log(Fmt) + V1_sup + V2_dem + V3_eco +
V4_PM5 + V5_G14 + V6_VAT + V7_dis + V8_lan + V9_time, data = sel,
init.theta = 0.4277592587, link = log)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.9081 -0.5647 -0.4074 -0.2509 9.2219
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -7.459417 0.077788 -95.89 <2e-16 ***
log(Fmt) 0.709288 0.006772 104.73 <2e-16 ***
V1_sup 0.463132 0.003957 117.04 <2e-16 ***
V2_dem 0.500326 0.004870 102.73 <2e-16 ***
V3_eco 0.203814 0.004826 42.23 <2e-16 ***
V4_PM5 0.621652 0.024064 25.83 <2e-16 ***
V5_G14 0.007012 0.017120 0.41 0.682
V6_VAT 4.542695 0.058730 77.35 <2e-16 ***
V7_dis 0.285375 0.006026 47.36 <2e-16 ***
V8_lan 0.285167 0.011097 25.70 <2e-16 ***
V9_time 1.890483 0.010877 173.80 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for Negative Binomial(0.4278) family taken to be 1)
Null deviance: 344244 on 301779 degrees of freedom
Residual deviance: 144004 on 301769 degrees of freedom
AIC: 427048
Number of Fisher Scoring iterations: 1
Theta: 0.42776
Std. Err.: 0.00327
2 x log-likelihood: -427023.87100
## ZERO INFLATED POISSON model
zip <- zeroinfl(Fmtp~log(Fmt)+
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel)
summary(zip)
Call:
zeroinfl(formula = Fmtp ~ log(Fmt) + V1_sup + V2_dem + V3_eco + V4_PM5 + V5_G14 +
V6_VAT + V7_dis + V8_lan + V9_time, data = sel)
Pearson residuals:
Min 1Q Median 3Q Max
-3.7568 -0.3252 -0.2084 -0.1151 68.9256
Count model coefficients (poisson with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) -4.132906 0.036618 -112.866 <2e-16 ***
log(Fmt) 0.632342 0.002800 225.821 <2e-16 ***
V1_sup 0.130557 0.002216 58.918 <2e-16 ***
V2_dem 0.070903 0.002565 27.638 <2e-16 ***
V3_eco 0.169461 0.003020 56.108 <2e-16 ***
V4_PM5 0.960299 0.008656 110.935 <2e-16 ***
V5_G14 -0.064960 0.007577 -8.573 <2e-16 ***
V6_VAT 1.044763 0.028611 36.516 <2e-16 ***
V7_dis 0.080098 0.002567 31.197 <2e-16 ***
V8_lan 0.425222 0.004537 93.720 <2e-16 ***
V9_time 0.761975 0.007185 106.045 <2e-16 ***
Zero-inflation model coefficients (binomial with logit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 5.972278 0.111578 53.53 <2e-16 ***
log(Fmt) -0.442406 0.009720 -45.51 <2e-16 ***
V1_sup -0.480427 0.005696 -84.35 <2e-16 ***
V2_dem -0.560241 0.007052 -79.44 <2e-16 ***
V3_eco -0.177646 0.006542 -27.16 <2e-16 ***
V4_PM5 -1.316257 0.049942 -26.36 <2e-16 ***
V5_G14 -0.452456 0.025669 -17.63 <2e-16 ***
V6_VAT -5.195433 0.095545 -54.38 <2e-16 ***
V7_dis -0.303161 0.008514 -35.61 <2e-16 ***
V8_lan -0.177194 0.015104 -11.73 <2e-16 ***
V9_time -1.686910 0.014667 -115.01 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Number of iterations in BFGS optimization: 31
Log-likelihood: -2.659e+05 on 22 Df
# WEIGHTED MODELS
## POISSON model
poi_w<-glm(Fmtp_w~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel,
family=poisson)
summary(poi_w)
Call:
glm(formula = Fmtp_w ~ V1_sup + V2_dem + V3_eco + V4_PM5 + V5_G14 +
V6_VAT + V7_dis + V8_lan + V9_time, family = poisson, data = sel)
Deviance Residuals:
Min 1Q Median 3Q Max
-5.8620 -0.7705 -0.5775 -0.4045 25.3249
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.276276 0.028387 -115.41 <2e-16 ***
V1_sup 0.341057 0.001909 178.67 <2e-16 ***
V2_dem 0.307636 0.002236 137.58 <2e-16 ***
V3_eco 0.250985 0.002617 95.92 <2e-16 ***
V4_PM5 0.813879 0.007990 101.86 <2e-16 ***
V5_G14 -0.076316 0.007120 -10.72 <2e-16 ***
V6_VAT 3.130357 0.024944 125.50 <2e-16 ***
V7_dis 0.236192 0.002343 100.83 <2e-16 ***
V8_lan 0.437102 0.004305 101.53 <2e-16 ***
V9_time 1.689902 0.005743 294.23 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for poisson family taken to be 1)
Null deviance: 1120406 on 301779 degrees of freedom
Residual deviance: 544503 on 301770 degrees of freedom
AIC: 700909
Number of Fisher Scoring iterations: 6
## NEGATIVE BINOMIAL model
nbi_w<-glm.nb(Fmtp_w~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel)
summary(nbi_w)
Call:
glm.nb(formula = Fmtp_w ~ V1_sup + V2_dem + V3_eco + V4_PM5 +
V5_G14 + V6_VAT + V7_dis + V8_lan + V9_time, data = sel,
init.theta = 0.263776437, link = log)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.5560 -0.5929 -0.4449 -0.2744 10.0889
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.735014 0.075505 -49.467 < 2e-16 ***
V1_sup 0.505304 0.004249 118.924 < 2e-16 ***
V2_dem 0.552558 0.005290 104.448 < 2e-16 ***
V3_eco 0.184096 0.005127 35.909 < 2e-16 ***
V4_PM5 0.632526 0.028687 22.049 < 2e-16 ***
V5_G14 0.077420 0.019439 3.983 6.81e-05 ***
V6_VAT 5.163204 0.066477 77.669 < 2e-16 ***
V7_dis 0.332701 0.006718 49.527 < 2e-16 ***
V8_lan 0.330807 0.012232 27.045 < 2e-16 ***
V9_time 1.817612 0.012139 149.737 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
(Dispersion parameter for Negative Binomial(0.2638) family taken to be 1)
Null deviance: 265123 on 301779 degrees of freedom
Residual deviance: 138266 on 301770 degrees of freedom
AIC: 450362
Number of Fisher Scoring iterations: 1
Theta: 0.26378
Std. Err.: 0.00189
2 x log-likelihood: -450339.69400
## ZERO INFLATED POISSON model
zip_w <- zeroinfl(Fmtp_w~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data=sel)
summary(zip_w)
Call:
zeroinfl(formula = Fmtp_w ~ V1_sup + V2_dem + V3_eco + V4_PM5 + V5_G14 + V6_VAT +
V7_dis + V8_lan + V9_time, data = sel)
Pearson residuals:
Min 1Q Median 3Q Max
-2.9266 -0.3088 -0.2089 -0.1249 75.9929
Count model coefficients (poisson with log link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.525496 0.030697 17.12 <2e-16 ***
V1_sup 0.100622 0.002147 46.87 <2e-16 ***
V2_dem 0.062757 0.002545 24.66 <2e-16 ***
V3_eco 0.083822 0.002876 29.14 <2e-16 ***
V4_PM5 0.857628 0.008248 103.98 <2e-16 ***
V5_G14 -0.119639 0.007472 -16.01 <2e-16 ***
V6_VAT 0.920395 0.027637 33.30 <2e-16 ***
V7_dis 0.117309 0.002430 48.27 <2e-16 ***
V8_lan 0.342164 0.004483 76.33 <2e-16 ***
V9_time 0.420141 0.005718 73.48 <2e-16 ***
Zero-inflation model coefficients (binomial with logit link):
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.776745 0.091174 52.39 <2e-16 ***
V1_sup -0.468355 0.005270 -88.86 <2e-16 ***
V2_dem -0.514703 0.006401 -80.41 <2e-16 ***
V3_eco -0.231822 0.006134 -37.79 <2e-16 ***
V4_PM5 -1.304368 0.045821 -28.47 <2e-16 ***
V5_G14 -0.335765 0.022510 -14.92 <2e-16 ***
V6_VAT -4.792922 0.078605 -60.98 <2e-16 ***
V7_dis -0.262086 0.007868 -33.31 <2e-16 ***
V8_lan -0.272961 0.014278 -19.12 <2e-16 ***
V9_time -1.987071 0.013068 -152.06 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Number of iterations in BFGS optimization: 30
Log-likelihood: -2.626e+05 on 20 Df
AIC<-AIC(poi,poi_w,nbi,nbi_w,zip,zip_w)
xtable(AIC)
% latex table generated in R 3.4.3 by xtable 1.8-2 package
% Wed Aug 29 10:23:48 2018
\begin{table}[ht]
\centering
\begin{tabular}{rrr}
\hline
& df & AIC \\
\hline
poi & 11.00 & 622430.25 \\
poi\_w & 10.00 & 700908.62 \\
nbi & 12.00 & 427047.87 \\
nbi\_w & 11.00 & 450361.69 \\
zip & 22.00 & 531749.08 \\
zip\_w & 20.00 & 525232.85 \\
\hline
\end{tabular}
\end{table}
write.table(AIC,"tab/AIC_models.csv")
First, we compute the parameters for each week :
library(pscl)
library(boot)
library(xtable)
library(MASS)
# Modele general
nbi<-glm.nb(Fmtp_w~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data = sel)
week<-"All"
res<-as.data.frame(sum$coefficients)
Error in sum$coefficients : objet de type 'builtin' non indiçable
Then we prepare figures for the analysis of results
library(dplyr)
library(ggplot2)
tab<-read.table("tab/nbi_by_week_coefficients.csv",sep=";",dec=",",stringsAsFactors = F)
tab<-tab[tab$time!="All" & tab$param!="(Intercept)",]
mypar<-names(table(tab$param))
mypar
[1] "V1_sup" "V2_dem" "V3_eco" "V4_PM5" "V5_G14" "V6_VAT" "V7_dis" "V8_lan" "V9_time"
tabpar<-tab[,c(3,4,5,6)]
names(tabpar)<-c("zval","pval","week","mod")
tabpar$time<-as.Date((tabpar$week))
tabpar$sign<-cut(tabpar$pval,breaks = c(-1,0.001, 0.01,0.05,1))
tabpar$name<-as.factor(tabpar$mod)
levels(tabpar$name)
[1] "V1_sup" "V2_dem" "V3_eco" "V4_PM5" "V5_G14" "V6_VAT" "V7_dis" "V8_lan" "V9_time"
levels(tabpar$name)<-c("(H1) Geographical Size","(H2) Population density","(H3) GDP per capita","(H4) Permanent at UNSC ","(H5) Other members of G20", "(H6) Vatican effect", "(H7) Geographical proximity","(H8) Common language", "(H9) Time autocorrelation")
tabpar$name<-as.factor(as.character(tabpar$name))
levels(tabpar$name)
[1] "(H1) Geographical Size" "(H2) Population density" "(H3) GDP per capita"
[4] "(H4) Permanent at UNSC " "(H5) Other members of G20" "(H6) Vatican effect"
[7] "(H7) Geographical proximity" "(H8) Common language" "(H9) Time autocorrelation"
levels(tabpar$sign)<-(c("***","**","*","-"))
p1<- ggplot(data = tabpar, aes(y = zval, x = time,colour=name))
p1<-p1+geom_line(data = tabpar, aes(y = zval, x = time),color="red")
p1<-p1+geom_point(data = tabpar, aes(y = zval, x = time, shape = sign),color="black")
p1<-p1+labs(x="Week in 2015",y="z-value",colour="model",shape="signif")
p1<-p1+scale_x_date(breaks=as.Date(c("2015-02-01","2015-03-01","2015-04-01","2015-05-01","2015-06-01","2015-07-01","2015-08-01","2015-09-01","2015-10-01","2015-11-01","2015-12-01")),labels=c("Fe","Ma","Ap","Ma","Jn","Jl","Au","Se","Oc","No","De"))
p1+facet_wrap(~name,scales = "free")
pdf(file = "fig/nbi_by_week.pdf",width=9,height=6)
p1+facet_wrap(~name,scales = "free")
dev.off()
quartz_off_screen
2
png(file = "fig/nbi_by_week.png",width=900,height=600)
p1+facet_wrap(~name,scales = "free")
dev.off()
quartz_off_screen
2
library(dplyr)
library(ggplot2)
tab<-read.table("tab/nbi_by_week_deviance.csv",sep=";",dec=",",stringsAsFactors = F)
ref<-tab[1,1]*100
tab<-tab[tab$time!="All",]
tab$time<-as.Date(tab$time)
tab$nbidev<-tab$nbidev*100
p1<- ggplot(data = tab, aes(y = nbidev, x = time))
#p1<-p1 +geom_ribbon(aes(ymax=nbidev,ymin=30,x=time))+geom_line(y=ref,color="gray")
p1<-p1+geom_line(data = tab, aes(y = nbidev, x = time),color="black")
p1<-p1+geom_point(data = tab, aes(y = nbidev, x = time),color="black")
p1<-p1+labs(x="Week in 2015",y="deviance explained (%)")
p1<-p1+scale_x_date(breaks=as.Date(c("2015-01-01","2015-02-01","2015-03-01","2015-04-01","2015-05-01","2015-06-01","2015-07-01","2015-08-01","2015-09-01","2015-10-01","2015-11-01","2015-12-01")),labels=c("Ja","Fe","Ma","Ap","Ma","Jn","Jl","Au","Se","Oc","No","De"))
p1<-p1+scale_y_continuous(breaks=c(30,35,40,45,50,55,60))
p1<-p1+geom_line(y=ref,color="black",size=1.5,linetype=1)
p1<-p1 + annotate("text", x = as.Date("2015-01-14"), y = 56, label = "Paris attack I")
p1<-p1 + annotate("text", x = as.Date("2015-01-25"), y = 39, label = "Yemen crisis")
p1<-p1 + annotate("text", x = as.Date("2015-03-01"), y = 58.5, label = "Netanyahu's speech in US")
p1<-p1 + annotate("text", x = as.Date("2015-03-20"), y = 32, label = "Pam cyclone + Bardo attack")
p1<-p1 + annotate("text", x = as.Date("2015-04-24"), y = 37.5, label = "Nepal earthquake")
p1<-p1 + annotate("text", x = as.Date("2015-07-14"), y = 58.5, label = "Iran nuclear deal")
p1<-p1 + annotate("text", x = as.Date("2015-06-26"), y = 43, label = "Sousse attack")
p1<-p1 + annotate("text", x = as.Date("2015-09-16"), y = 42, label = "Chile earthquake")
p1<-p1 + annotate("text", x = as.Date("2015-11-01"), y = 57.5, label = "Paris attack II")
p1<-p1 + annotate("text", x = as.Date("2015-12-20"), y = 56.5, label = "UK storms")
p1<- p1+annotate("text", x = as.Date("2015-11-20"), y = 47, label = "GLOBAL MODEL")
p1
?geom_line
Now we compute the model by media
library(pscl)
library(boot)
library(xtable)
library(MASS)
# Modele general
nbi<-glm.nb(Fmtp_w~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data = sel)
sum<-summary(nbi)
med<-"All"
res<-as.data.frame(sum$coefficients)
res$media<-med
res$param<-rownames(res)
res$effect<-exp(res$Estimate)
nbidev<-(nbi$null.deviance-nbi$deviance)/nbi$null.deviance
dev<-as.data.frame(nbidev)
dev$media<-med
#filename<-paste("tab/nbi_",med,".csv",sep="")
#write.table(res, filename,sep=";",dec=",",row.names = T)
tabres<-res
tabdev<-dev
medias<-names(table(sel$m))
for (med in medias) {
medsel<-sel[sel$m==med,]
## ZERO INFLATED NEGATIVE BINOMIAL model with USA
nbi<-glm.nb(Fmtp_w ~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data = medsel)
sum<-summary(nbi)
res<-as.data.frame(sum$coefficients)
res$media<-med
res$param<-rownames(res)
res$effect<-exp(res$Estimate)
#filename<-paste("tab/nbi_",med,".csv",sep="")
#write.table(res, filename,sep=";",dec=",",row.names = T)
tabres<-rbind(tabres,res)
nbidev<-(nbi$null.deviance-nbi$deviance)/nbi$null.deviance
dev<-as.data.frame(nbidev)
dev$media<-med
tabdev<-rbind(tabdev,dev)
print(paste(med,nbidev))
}
[1] "en_AUS_austra_int 0.486263788717818"
[1] "en_AUS_dteleg_int 0.395196532077931"
[1] "en_AUS_mohera_int 0.602932394235866"
[1] "en_AUS_theage_int 0.603017394872161"
[1] "en_CAN_starca_int 0.452799966027384"
[1] "en_CAN_vansun_int 0.416887331839391"
[1] "en_CHN_chinad_int 0.585792045935893"
[1] "en_GBR_dailyt_int 0.58982225808143"
[1] "en_GBR_finati_int 0.564296739889655"
[1] "en_GBR_guardi_int 0.609664229909144"
[1] "en_IND_tindia_int 0.625993585340101"
[1] "en_MLT_tmalta_int 0.630576152635283"
[1] "en_USA_latime_int 0.415500832815684"
[1] "en_USA_nytime_int 0.522520824990666"
[1] "en_USA_usatdy_int 0.406338460399108"
[1] "en_ZWE_chroni_int 0.358039359590375"
[1] "es_BOL_patria_int 0.540738589852891"
[1] "es_CHL_tercer_int 0.571316987745279"
[1] "es_ESP_catalu_int 0.47819588773576"
[1] "es_ESP_elpais_int 0.592818988482916"
[1] "es_ESP_farode_int 0.44854092370159"
[1] "es_MEX_cronic_int 0.563852841976347"
[1] "es_MEX_Inform_int 0.553653942267704"
[1] "es_VEN_univer_int 0.638135503720411"
[1] "fr_BEL_derheu_int 0.4069366293502"
[1] "fr_BEL_lesoir_int 0.421041375331192"
[1] "fr_FRA_antill_int 0.426127291683969"
[1] "fr_FRA_figaro_int 0.447148073603345"
[1] "fr_FRA_lepari_int 0.410703766613582"
[1] "fr_FRA_libera_int 0.353624721843542"
[1] "fr_FRA_lmonde_int 0.501717825908408"
filename<-paste("tab/nbi_by_media_coefficients.csv")
write.table(tabres, filename,sep=";",dec=",",row.names = T)
filename<-paste("tab/nbi_by_media_deviance.csv")
write.table(tabdev, filename,sep=";",dec=",",row.names = T)
library(dplyr)
library(ggplot2)
library(reshape2)
list.files("tab")
tab<-read.table("tab/nbi_by_media_coefficients.csv",sep=";",dec=",")
tab$par<-paste(tab$mod,tab$param,sep="")
tab2<-tab[,c("par","media","Estimate")]
tab3<-dcast(tab2,media ~ par)
rownames(tab3)<-substr(tab3$media,4,9)
tab4<-tab3[-1,-c(1:2)]
##########
# PCA + HCPC
##########
library(FactoMineR)
tab4<-tab3[-1,-c(1:2)]
row.names(tab4)
row.names(tab4)<-c("Australian (AUS)", "Daily Telegr. (AUS)", "Syd. Mo. Her.(AUS)","The Age (AUS)",
"The Star (CAN)","Vanc. Sun (CAN)", "China daily (CHN",
"Daily Telegr. (GBR)", "Fin. Time (GBR)","Guardian (GBR)","Times of India (IND)",
"Times of Malta (MLT)","L.A. Time (USA)",
"New York Times (USA)","USA Today (USA)", "Chronicle (ZBW)",
"La Patria (PER)", "La Tercera (CHL)",
"Per. di Catal. (ESP)","El Pais (ESP)", "Faro (ESP)",
"Chronicle (MEX)", "Independent (MEX)","Universal (VEN)",
"Der. Heure (BEL)","Le Soir (BEL)",
"Fr-Antilles (FRA)", "Le Figaro (FRA)","Le Parisien (FRA)","Libération (FRA)", "Le Monde (FRA)")
names(tab4)<-c("(H1) Geographical Size","(H2) Population density","(H3) GDP per capita","(H4) Permanent at UNSC ","(H5) Other members of G20", "(H6) Vatican effect", "(H7) Geographical proximity","(H8) Common language", "(H9) Time autocorrelation")
### PCA ###
res.pca=PCA(tab4, scale.unit=T,ncp=Inf,graph=F)
par(mfrow=c(1,2),mar=c(2,2,2,2))
plot.PCA(res.pca,choix="var",xlim=c(-1,1),title="Correlation",cex=0.6)
plot.PCA(res.pca,choix="ind", title="Coordinates", xlim=c(-5,5),ylim=c(-4,4),cex=0.6)
pdf("fig/PCA_media.pdf",width=12,height=6,pointsize = 10)
res.pca=PCA(tab4, scale.unit=T,ncp=Inf,graph=F)
par(mfrow=c(1,2),mar=c(4,4,4,4))
plot.PCA(res.pca,choix="var",xlim=c(-1,1),ylim=c(-1,1),title="Correlation",cex=0.7)
plot.PCA(res.pca,choix="ind", title="Coordinates", xlim=c(-4,4),ylim=c(-4,4),cex=0.7)
dev.off()
### HCPC
res.hcpc<-HCPC(res.pca,nb.clust = 4)
par(mfrow=c(1,1),mar=c(2,2,2,2))
plot.HCPC(res.hcpc,choice="tree",new.plot=F)
pdf("fig/HCPC_media.pdf",width=6,height=6)
par(mfrow=c(1,1),mar=c(4,4,4,4))
plot.HCPC(res.hcpc,choice="tree",new.plot=F)
dev.off()
### Interpretation of clusters
res.hcpc$desc.var$quanti
#dTable ordered by clusters
str(res.hcpc)
tabres<-tab3[-1,-c(1,2)]
row.names(tabres) <-row.names(tab4)
names(tabres)<-c("(H1) Geographical Size","(H2) Population density","(H3) GDP per capita","(H4) Permanent at UNSC ","(H5) Other members of G20", "(H6) Vatican effect", "(H7) Geographical proximity","(H8) Common language", "(H9) Time autocorrelation")
tabres$class<-as.numeric(res.hcpc$data.clust$clust)
#liste des groupes
tabres<-round(tabres[order(tabres$class),order(names(tabres))],2)
print(xtable(tabres, align = "r|rrrrrrrrrr"))
write.table(tabres,"tab/nbi_media_cluster.csv")
We consider a specific week and we try to analyze the countries where newspaper has depicted exceptional events.
We decide to use the model by media for the definition of residuals instead of the model by week. These choice is the result of our analysis that conclude that the rules of media are not changing through time but rather from one media to another one.
library(pscl)
Classes and Methods for R developed in the
Political Science Computational Laboratory
Department of Political Science
Stanford University
Simon Jackman
hurdle and zeroinfl functions by Achim Zeileis
library(boot)
library(xtable)
library(MASS)
Attachement du package : ‘MASS’
The following object is masked from ‘package:dplyr’:
select
tabres <- data.frame(m=character(),
s=character(),
t=character(),
OBS = double(),
EST = double(),
stringsAsFactors=FALSE)
medias<-names(table(sel$m))
for (med in medias) {
medsel<-sel[sel$m==med,]
## ZERO INFLATED NEGATIVE BINOMIAL model with USA
nbi<-glm.nb(Fmtp_w ~
V1_sup+V2_dem+V3_eco+
V4_PM5+V5_G14+V6_VAT+
V7_dis+V8_lan+V9_time,
data = medsel)
tabres2<-medsel[,c(1,2,3)]
tabres2$OBS<-medsel$Fmtp_w
tabres2$EST<-nbi$fitted.values
tabres<-rbind(tabres,tabres2)
}
write.table(tabres,"tab/resid_model_by_media.csv",row.names = T, sep=";",dec=",")
tab<-read.table("tab/resid_model_by_media.csv",sep=";",dec=",",header=T)
tabres<-tab%>%group_by(p)%>%summarise(OBS= sum(OBS), EST=sum(EST))
tabres$RES_ABS<-tabres$OBS-tabres$EST
tabres$RES_REL<-tabres$OBS/tabres$EST
tabres$RES_CHI2<-((tabres$RES_ABS)**2)/tabres$EST
tabres2<-tabres[order(tabres$RES_REL,decreasing = T),]
head(tabres2,20)
# Synthesis
code<-read.table("data/codes2015.csv",
sep=";",
header=T,
stringsAsFactors = F,
encoding="UTF-8")
code<-code[,c("ISO3","name")]
tabres<-merge(code,tabres,by.x="ISO3",by.y="p",all.x=F,all.y=T)
tabres<-tabres[order(tabres$RES_REL,decreasing = T),]
write.table(tabres,"tab/residual_salience_of_nation.csv",sep=";",dec=",",row.names = F)
tli<-xtable(tabres[(1:20),],digits=c(0,0,0,0,0,0,2,1))
print(tli,include.rownames=F)
% latex table generated in R 3.4.3 by xtable 1.8-2 package
% Wed Aug 29 10:28:29 2018
\begin{table}[ht]
\centering
\begin{tabular}{llrrrrr}
\hline
ISO3 & name & OBS & EST & RES\_ABS & RES\_REL & RES\_CHI2 \\
\hline
VUT & Vanuatu & 303 & 49 & 254 & 6.22 & 1327.7 \\
SYR & Syria & 9521 & 1967 & 7554 & 4.84 & 29007.9 \\
GRC & Greece & 9467 & 2071 & 7396 & 4.57 & 26415.5 \\
SYC & Seychelles & 301 & 71 & 230 & 4.21 & 737.8 \\
NPL & Nepal & 2937 & 871 & 2066 & 3.37 & 4898.0 \\
PSE & Palestine & 2636 & 802 & 1834 & 3.29 & 4189.8 \\
DMA & Dominica & 96 & 30 & 66 & 3.22 & 146.9 \\
MDV & Maldives & 343 & 113 & 230 & 3.04 & 468.2 \\
ISR & Israel & 4973 & 2147 & 2826 & 2.32 & 3717.6 \\
LBY & Libya & 2091 & 905 & 1186 & 2.31 & 1554.8 \\
YEM & Yemen & 2705 & 1220 & 1485 & 2.22 & 1806.7 \\
AFG & Afghanistan & 3231 & 1462 & 1769 & 2.21 & 2139.7 \\
CAF & Central African Republic & 529 & 251 & 278 & 2.11 & 309.5 \\
BDI & Burundi & 1104 & 548 & 556 & 2.01 & 563.2 \\
AUS & Australia & 2298 & 1227 & 1071 & 1.87 & 933.8 \\
JOR & Jordan & 803 & 494 & 309 & 1.63 & 193.1 \\
IRQ & Iraq & 4693 & 2905 & 1788 & 1.62 & 1101.2 \\
TUN & Tunisia & 2124 & 1354 & 770 & 1.57 & 437.3 \\
SOM & Somalia & 618 & 407 & 211 & 1.52 & 110.0 \\
NZL & New Zealand & 874 & 583 & 291 & 1.50 & 145.5 \\
\hline
\end{tabular}
\end{table}
tab<-read.table("tab/resid_model_by_media.csv",sep=";",dec=",",header=T)
# select media
#table(tab$m)
tab2<-tab[tab$m=="fr_BEL_lesoir_int",]
# select countries
#table(tab$p)
liste_p<- c("SYR","TUR","GRC","HUN","AUT","DEU","TUN","MMR","NPL","FRA","USA","GBR","CHN","RUS","VAT","CUB","CHL","BRA","ISR","PSE","IDN","BGD")
tab3<-tab2[tab2$p %in% liste_p,]
tab3$RES_ABS<-tab3$OBS-tab3$EST
tab3$RES_REL<-tab3$OBS/tab3$EST
tab3$sign<-log(tab3$RES_REL)/log(2)
tab3$sign[is.na(tab3$sign)]<-0
tab3$sign[tab3$sign<0]<-0
tab3$sign[tab3$OBS<2]<-0
library(ggplot2)
le package ‘ggplot2’ a été compilé avec la version R 3.4.4RStudio Community is a great place to get help:
https://community.rstudio.com/c/tidyverse.
ggplot(tab3, aes(t, p )) +
geom_tile(aes(fill = sign), color = "white") +
scale_fill_gradient(low = "white", high = "black") +
ylab("country ") +
xlab("week") +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size=16),
axis.title=element_text(size=14,face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(fill = "residual salience")
tab<-read.table("tab/resid_model_by_media.csv",sep=";",dec=",",header=T)
# select countries
#table(tab$p)
tab3<-tab[tab$p=="YEM",]
table(tab3$t)
2015-01-05 2015-01-12 2015-01-19 2015-01-26 2015-02-02
29 31 23 23 31
2015-02-09 2015-02-16 2015-02-23 2015-03-02 2015-03-09
31 31 31 31 31
2015-03-16 2015-03-23 2015-03-30 2015-04-06 2015-04-13
31 31 31 31 31
2015-04-20 2015-04-27 2015-05-04 2015-05-11 2015-05-18
31 31 31 31 30
2015-05-25 2015-06-01 2015-06-08 2015-06-15 2015-06-22
30 31 31 31 31
2015-06-29 2015-07-06 2015-07-13 2015-07-20 2015-07-27
30 30 30 31 31
2015-08-03 2015-08-10 2015-08-17 2015-08-24 2015-08-31
31 31 31 31 31
2015-09-07 2015-09-14 2015-09-21 2015-09-28 2015-10-05
31 31 31 31 31
2015-10-12 2015-10-19 2015-10-26 2015-11-02 2015-11-09
31 31 31 31 31
2015-11-16 2015-11-23 2015-11-30 2015-12-07 2015-12-14
31 30 29 30 30
2015-12-21 2015-12-28
29 29
# exclude medias
#tab3<-tab3[!(tab3$m %in% c("en_ZWE_chroni_int")),]
table(tab3$m)
en_AUS_austra_int en_AUS_dteleg_int en_AUS_mohera_int
50 49 52
en_AUS_theage_int en_CAN_starca_int en_CAN_vansun_int
52 52 50
en_CHN_chinad_int en_GBR_dailyt_int en_GBR_finati_int
52 52 52
en_GBR_guardi_int en_IND_tindia_int en_MLT_tmalta_int
52 52 52
en_USA_latime_int en_USA_nytime_int en_USA_usatdy_int
52 52 52
en_ZWE_chroni_int es_BOL_patria_int es_CHL_tercer_int
46 46 52
es_ESP_catalu_int es_ESP_elpais_int es_ESP_farode_int
52 52 50
es_MEX_cronic_int es_MEX_Inform_int es_VEN_univer_int
52 52 52
fr_BEL_derheu_int fr_BEL_lesoir_int fr_FRA_antill_int
48 52 52
fr_FRA_figaro_int fr_FRA_lepari_int fr_FRA_libera_int
50 52 52
fr_FRA_lmonde_int
47
#tab3<-tab3[!(as.character(tab3$t) %in% c("2015-01-05","2015-01-12","2015-01-19","2015-01-26")),]
tab3$RES_ABS<-tab3$OBS-tab3$EST
tab3$RES_REL<-tab3$OBS/tab3$EST
tab3$siz<-tab3$OBS
tab3$col<-log(tab3$RES_REL+0.01)/log(2)
library(ggplot2)
ggplot(tab3, aes(t, m )) +
geom_point(aes(colour=col,size=siz))+scale_colour_gradient(low = "white", high = "black") +
ylab("media ") +
xlab("week") +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size=16),
axis.title=element_text(size=14,face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(colour = "log(OBS/EST)",size="nb. of news")
pdf("fig/heatmap_yemen.pdf",width = 8, height = 6)
ggplot(tab3, aes(t, m )) +
geom_point(aes(colour=col,size=siz))+scale_colour_gradient(low = "white", high = "black") +
ylab("media ") +
xlab("week") +
theme(legend.title = element_text(size = 10),
legend.text = element_text(size = 12),
plot.title = element_text(size=16),
axis.title=element_text(size=14,face="bold"),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(colour = "log(OBS/EST)",size="nb. of news")
dev.off()
quartz_off_screen
2