DACSS 603 Final Project Work: “Media Analysis”
Read in data for both measures of media interest gathered from the Spike/Newswhip media site
Public Interest = Social media interactions on articles
Media Interest = Number of articles published
# A tibble: 6 x 10
Date All_Locations Europe North_America Oceania South_America
<date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2022-02-26 19811 7777 4763 2301 0
2 2022-02-27 410149 346425 56690 3703 171
3 2022-02-28 295464 238816 53122 278 166
4 2022-03-01 202270 171920 28329 662 34
5 2022-03-02 95478 73628 21099 836 12
6 2022-03-03 33566 8929 24598 164 6
# ... with 4 more variables: Asia <dbl>, Africa <dbl>,
# Middle_East <dbl>, Southeast_Asia <dbl>
# A tibble: 6 x 10
Date All_Locations Europe North_America Oceania South_America
<date> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2022-02-26 134 15 61 16 0
2 2022-02-27 89 14 42 4 2
3 2022-02-28 356 46 249 4 1
4 2022-03-01 106 29 46 8 0
5 2022-03-02 152 26 75 3 0
6 2022-03-03 109 11 62 1 0
# ... with 4 more variables: Asia <dbl>, Africa <dbl>,
# Middle_East <dbl>, Southeast_Asia <dbl>
All measure are in one dataset for analysis.
#data frame with all regional observations
regional_all <- read_csv("comprehensive_by_region.csv")
regional_all <- as_tibble(regional_all)
regional_all
# A tibble: 8 x 6
Region Total_Population Sample_Populati~ DDOS_Users Public_Interest
<chr> <dbl> <dbl> <dbl> <dbl>
1 Africa 1234685606 459973244 164 7666
2 Asia 4434971532 3314507290 1443 29078
3 Europe 748481333 745852328 40262 914764
4 Middle~ 393498810 303663005 390 1511
5 North_~ 600504974 512581441 5245 291484
6 Oceania 43693399 30801415 718 9700
7 South_~ 437360279 404401418 393 735
8 Southe~ 680855171 614900482 264 9033
# ... with 1 more variable: Media_Interest <dbl>
#proportions
proportions <- read.csv("proportions.csv")
proportions
Region Total_Population Sample_Population DDOS_Users
1 Africa 0.082521173 0.030742670 1.096110e-08
2 Asia 0.296414773 0.221527673 9.644400e-08
3 Europe 0.050025332 0.049849621 2.690942e-06
4 Middle_East 0.026299799 0.020295553 2.606600e-08
5 North_America 0.040135217 0.034258779 3.505537e-07
6 Oceania 0.002920282 0.002058637 4.798810e-08
7 South_America 0.029231315 0.027028483 2.626650e-08
8 Southeast_Asia 0.045505485 0.041097352 1.764460e-08
Public_Interest Media_Interest
1 5.123631e-07 1.33670e-09
2 1.943451e-06 2.05186e-08
3 6.113896e-05 1.53054e-08
4 1.009889e-07 2.20560e-09
5 1.948156e-05 6.02190e-08
6 6.483070e-07 4.01010e-09
7 4.912430e-08 3.34200e-10
8 6.037276e-07 6.54990e-09
cor(regional_all$DDOS_Users, regional_all$Public_Interest, method = "pearson")
[1] 0.9811875
Comparing the population of each region being examined and the representative population of the countries represented in my sample. These are highly correlated.
Call:
lm(formula = Total_Population ~ Sample_Population, data = regional_all,
na.action = na.exclude)
Residuals:
Min 1Q Median 3Q Max
-253750322 -121385069 -57833324 -973754 611162518
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.419e+07 1.298e+08 0.109 0.917
Sample_Population 1.325e+00 1.032e-01 12.837 1.37e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 283600000 on 6 degrees of freedom
Multiple R-squared: 0.9649, Adjusted R-squared: 0.959
F-statistic: 164.8 on 1 and 6 DF, p-value: 1.373e-05
Explanatory variables are both the public interest and media interest counts with the DDOS user counts as the outcome variable.
Call:
lm(formula = DDOS_Users ~ Public_Interest + Media_Interest, data = regional_all,
na.action = na.exclude)
Residuals:
1 2 3 4 5 6 7
-819.0092 2051.3093 167.6466 -199.0906 -704.4965 -0.2989 -410.6496
8
-85.4111
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 815.022433 478.700459 1.703 0.14938
Public_Interest 0.045171 0.001319 34.234 4e-07 ***
Media_Interest -8.914703 1.411800 -6.314 0.00147 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1060 on 5 degrees of freedom
Multiple R-squared: 0.9958, Adjusted R-squared: 0.9942
F-statistic: 599.5 on 2 and 5 DF, p-value: 1.112e-06
===============================================
Dependent variable:
---------------------------
DDOS_Users
-----------------------------------------------
Public_Interest 0.045***
(0.001)
Media_Interest -8.915***
(1.412)
Constant 815.022
(478.700)
-----------------------------------------------
Observations 8
R2 0.996
Adjusted R2 0.994
Residual Std. Error 1,060.060 (df = 5)
F Statistic 599.468*** (df = 2; 5)
===============================================
Note: *p<0.1; **p<0.05; ***p<0.01
#create plot
regional_plot <- ggplot(regional_all, aes(x = log(Media_Interest), y = log(DDOS_Users), color = Region)) +
geom_point() +
geom_smooth(method=lm,se=TRUE,fullrange=TRUE,color="cornflowerblue") +
labs(title= "Media Interest and Users",
x= "Media Interest",
y = "Users") +
theme_minimal_hgrid()
regional_plot
#create plot
regional_plot2 <- ggplot(regional_all, aes(x = log(Public_Interest), y = log(DDOS_Users), color = Region)) +
geom_point() +
geom_smooth(method=lm,se=TRUE,fullrange=TRUE,color="cornflowerblue") +
labs(title= "Public Interest and Users",
x= "Public Interest",
y = "Users") +
theme_minimal_hgrid()
regional_plot2
Fitting another model, this time with an interaction term allowing interaction between population and media interest
Call:
lm(formula = DDOS_Users ~ Public_Interest + Media_Interest +
Public_Interest * Media_Interest, data = regional_all)
Residuals:
1 2 3 4 5 6 7
-342.124 81.899 2.874 202.827 -4.138 111.255 252.096
8
-304.690
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.008e+02 1.583e+02 0.637 0.55879
Public_Interest 5.302e-02 1.050e-03 50.513 9.19e-07
Media_Interest 2.492e-01 1.214e+00 0.205 0.84739
Public_Interest:Media_Interest -4.010e-05 5.038e-06 -7.959 0.00135
(Intercept)
Public_Interest ***
Media_Interest
Public_Interest:Media_Interest **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 288.8 on 4 degrees of freedom
Multiple R-squared: 0.9998, Adjusted R-squared: 0.9996
F-statistic: 5405 on 3 and 4 DF, p-value: 1.141e-07
Call:
lm(formula = DDOS_Users ~ Public_Interest, data = regional_all)
Residuals:
Min 1Q Median 3Q Max
-6530.1 465.6 853.1 930.3 2035.0
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -5.955e+02 1.158e+03 -0.514 0.625
Public_Interest 4.244e-02 3.409e-03 12.449 1.64e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2899 on 6 degrees of freedom
Multiple R-squared: 0.9627, Adjusted R-squared: 0.9565
F-statistic: 155 on 1 and 6 DF, p-value: 1.641e-05
Call:
lm(formula = DDOS_Users ~ Media_Interest, data = regional_all)
Residuals:
Min 1Q Median 3Q Max
-5676 -5161 -4585 -4362 33997
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4678.357 6515.562 0.718 0.500
Media_Interest 6.928 18.681 0.371 0.723
Residual standard error: 14850 on 6 degrees of freedom
Multiple R-squared: 0.02241, Adjusted R-squared: -0.1405
F-statistic: 0.1375 on 1 and 6 DF, p-value: 0.7235
gg2 <- ggplot() +
geom_smooth(aes(log(DDOS_Users), log(Media_Interest)), data = regional_all, fullrange=TRUE,
method = "lm", se = TRUE, color = "red3") +
geom_smooth(aes(log(DDOS_Users), log(Public_Interest)), data = regional_all, fullrange=TRUE,
method = "lm", se = TRUE, color = "forestgreen") +
ggtitle("Media Interest and Public Interest & DDOS Users", subtitle = "Green Indicating Public Interest, Red Indicating Media Interest") +
xlab("log: DDOS Users") +
ylab("log: Media Interactions") +
theme_minimal_hgrid()
gg2
Fitting the media models with stargazer
library(stargazer)
library(tinytex)
library(sandwich)
m1 = lm(DDOS_Users ~ Media_Interest, data = regional_all)
m2 = lm(DDOS_Users ~ Public_Interest, data = regional_all)
model.lst = list(m1, m2)
stargazer(m1,
m2,
title="Displaying results for multiple response variables",
type = "text",
float = TRUE,
report = "vcsp",
se=lapply(model.lst, function(x) sqrt(diag(vcovHC(x, type = "HC1")))),
no.space = TRUE,
header=FALSE,
single.row = TRUE,
font.size = "small",
intercept.bottom = F,
covariate.labels = c("Intercept", "Media Interest", "Public Interest"),
column.labels = c("Media Interest", "Public Interest"),
column.separate = c(1, 1),
digits = 3,
t.auto = T,
p.auto = T
)
Displaying results for multiple response variables
=====================================================================
Dependent variable:
----------------------------------------
DDOS_Users
Media Interest Public Interest
(1) (2)
---------------------------------------------------------------------
Intercept 4,678.357 (5,155.242) -595.471 (807.308)
p = 0.365 p = 0.461
Media Interest 6.928 (8.020)
p = 0.388
Public Interest 0.042 (0.003)
p = 0.000
---------------------------------------------------------------------
Observations 8 8
R2 0.022 0.963
Adjusted R2 -0.141 0.957
Residual Std. Error (df = 6) 14,846.860 2,898.960
F Statistic (df = 1; 6) 0.138 154.983***
=====================================================================
Note: *p<0.1; **p<0.05; ***p<0.01
For attribution, please cite this work as
Becvar (2022, May 4). IT Army: Media Data. Retrieved from https://kbec19.github.io/it-army/posts/media-data/
BibTeX citation
@misc{becvar2022media, author = {Becvar, Kristina}, title = {IT Army: Media Data}, url = {https://kbec19.github.io/it-army/posts/media-data/}, year = {2022} }