1 Prerequisites

1.1 Materials

Please visit https://github.com/arifpras/KelasData for the datasets and other relevant materials to be used during the sessions.

1.2 Clearing the environment

rm(list=ls())
ls()
## character(0)

1.3 Packages and libraries

#setting working directory
setwd("/Users/arifpras/OneDrive - The University of Nottingham/BB_KelasData/KelasData/00_Datasets")

1.4 Datasets

op_sales <- readxl::read_excel(path = "/Users/arifpras/OneDrive - The University of Nottingham/BB_KelasData/KelasData/00_Datasets/OP_all.xlsx", sheet = "OP_sales")
glimpse(op_sales)
## Rows: 51
## Columns: 8
## $ volume          <dbl> 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63…
## $ title           <chr> "Arriving Once Again", "The 11 Supernovas", "Roger and…
## $ release_date    <dttm> 2008-06-04, 2008-09-04, 2008-12-04, 2009-03-04, 2009-…
## $ pages           <dbl> 216, 232, 216, 216, 216, 200, 216, 216, 216, 216, 216,…
## $ chapters        <dbl> 10, 11, 10, 10, 10, 9, 10, 11, 11, 11, 10, 9, 11, 12, …
## $ sales_mio       <dbl> 1.441785, 1.517953, 1.476194, 1.641952, 1.609125, 1.66…
## $ last_movie_days <dbl> 95, 187, 278, 368, 460, 552, 643, 82, 174, 235, 327, 4…
## $ vixi_avg        <dbl> 23.60, 39.95, 48.06, 42.48, 28.58, 24.40, 20.15, 17.16…
#change the date format: from <POSITX> to <date>
op_sales <- op_sales %>% 
  mutate(release_date = as.Date(release_date))

glimpse(op_sales)
## Rows: 51
## Columns: 8
## $ volume          <dbl> 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63…
## $ title           <chr> "Arriving Once Again", "The 11 Supernovas", "Roger and…
## $ release_date    <date> 2008-06-04, 2008-09-04, 2008-12-04, 2009-03-04, 2009-…
## $ pages           <dbl> 216, 232, 216, 216, 216, 200, 216, 216, 216, 216, 216,…
## $ chapters        <dbl> 10, 11, 10, 10, 10, 9, 10, 11, 11, 11, 10, 9, 11, 12, …
## $ sales_mio       <dbl> 1.441785, 1.517953, 1.476194, 1.641952, 1.609125, 1.66…
## $ last_movie_days <dbl> 95, 187, 278, 368, 460, 552, 643, 82, 174, 235, 327, 4…
## $ vixi_avg        <dbl> 23.60, 39.95, 48.06, 42.48, 28.58, 24.40, 20.15, 17.16…
(
  db01 <- op_sales %>%
    pivot_longer(
      cols = pages:vixi_avg,
      names_to = "obsvar",
      values_to = "obsval"
    ) %>%
    mutate(
      obsvar = recode(
        obsvar,
        chapters = "Chapters",
        last_movie_days = "Days from Last Movie",
        pages = "Pages",
        sales_mio = "Total Sales (in millions)",
        vixi_avg = "VIX Index (in average)"
      )
    )
)

2 Plotting

2.1 Line chart

(
  plot01 <- db01 %>%
    #plot the dataset
    ggplot(aes(x = volume, y = obsval)) +
    geom_line(aes(color = obsvar)) +
    facet_wrap( ~ obsvar, scales = "free", nrow = 2) +
    theme_light() +
    labs(x = "\nVolume", y = "") +
    #ggthemes::scale_color_colorblind() +
    scale_color_brewer(palette = "Set1") +
    theme(
      axis.text.x = element_text(size = 8),
      axis.ticks.x = element_blank(),
      axis.line.x = element_blank(),
      axis.title.x = element_text(size = 7),
      axis.line.y = element_blank(),
      axis.title.y = element_text(size = 7),
      axis.text.y = element_text(size = 8),
      plot.title.position = "plot",
      plot.caption.position = "plot",
      legend.title = element_text(size = 7),
      legend.text = element_text(size = 7),
      legend.key.size = unit(0.5, "cm"),
      legend.spacing = unit(1, "cm"),
      plot.title = element_text(hjust = 0, size = 13, face = "bold"),
      plot.subtitle = element_text(hjust = 0, size = 12),
      plot.caption = element_text(hjust = 0, size = 12),
      # legend.title = element_blank(),
      legend.position = "bottom",
      strip.text.x = element_text(size = 8),
      panel.grid.major.y = element_line(colour = "grey97"),
      panel.ontop = FALSE,
      panel.spacing = unit(1, "lines")
    ) +
    guides(color = guide_legend(title = "Observable variables:"))
)

2.2 Box plot

pacman::p_load(colorspace)
(
  plot02 <- db01 %>%
    #plot the dataset
    ggplot(aes(x = volume, y = obsval)) +
    geom_boxplot(aes(
      color = obsvar,
      fill = after_scale(desaturate(lighten(color, .7), .7))
      ),
      size = 1) +
    #geom_line(aes(color = obsvar)) +
    facet_wrap(~ obsvar, scales = "free", nrow = 2) +
    theme_light() +
    labs(
      title = "Boxplot",
      x = "\nVolume", y = "") +
    #ggthemes::scale_color_colorblind() +
    scale_color_brewer(palette = "Set1") +
    theme(
      axis.text.x = element_text(size = 8),
      axis.ticks.x = element_blank(),
      axis.line.x = element_blank(),
      axis.title.x = element_text(size = 7),
      axis.line.y = element_blank(),
      axis.title.y = element_text(size = 7),
      axis.text.y = element_text(size = 8),
      plot.title.position = "plot",
      plot.caption.position = "plot",
      legend.title = element_text(size = 7),
      legend.text = element_text(size = 7),
      legend.key.size = unit(0.5, "cm"),
      legend.spacing = unit(1, "cm"),
      plot.title = element_text(hjust = 0, size = 16, face = "bold"),
      plot.subtitle = element_text(hjust = 0, size = 12),
      plot.caption = element_text(hjust = 0, size = 12),
      # legend.title = element_blank(),
      legend.position = "bottom",
      strip.text.x = element_text(size = 8),
      panel.grid.major.y = element_line(colour = "grey97"),
      panel.ontop = FALSE,
      panel.spacing = unit(1, "lines")
      ) +
    guides(color = guide_legend(title = "Observable variables:"))
)

2.3 Histogram

#pacman::p_load(colorspace)
(
  plot03 <- db01 %>%
    #plot the dataset
    ggplot(aes(x = obsval)) +
    geom_histogram(bins = 50, 
                   aes(
                     color = obsvar,
                     fill = after_scale(desaturate(lighten(color, .7), .7))
                     )) +
    facet_wrap(~ obsvar, scales = "free", nrow = 2) +
    theme_light() +
    labs(
      title = "Histogram",
      x = "\nRelease Date", y = "") +
    #ggthemes::scale_color_colorblind() +
    scale_color_brewer(palette = "Set1") +
    theme(
      axis.text.x = element_text(size = 8),
      axis.ticks.x = element_blank(),
      axis.line.x = element_blank(),
      axis.title.x = element_text(size = 7),
      axis.line.y = element_blank(),
      axis.title.y = element_text(size = 7),
      axis.text.y = element_text(size = 8),
      plot.title.position = "plot",
      plot.caption.position = "plot",
      legend.title = element_text(size = 7),
      legend.text = element_text(size = 7),
      legend.key.size = unit(0.5, "cm"),
      legend.spacing = unit(1, "cm"),
      plot.title = element_text(hjust = 0, size = 16, face = "bold"),
      plot.subtitle = element_text(hjust = 0, size = 12),
      plot.caption = element_text(hjust = 0, size = 12),
      # legend.title = element_blank(),
      legend.position = "bottom",
      strip.text.x = element_text(size = 8),
      panel.grid.major.y = element_line(colour = "grey97"),
      panel.ontop = FALSE,
      panel.spacing = unit(1, "lines")
      ) +
    guides(color = guide_legend(title = "Observable variables:"))
)

3 Descriptive statistics

3.1 Summary: skim

op_sales %>% select(pages, chapters, sales_mio, last_movie_days, vixi_avg) %>% 
  skimr::skim()
Data summary
Name Piped data
Number of rows 51
Number of columns 5
_______________________
Column type frequency:
numeric 5
________________________
Group variables None

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
pages 0 1 214.90 11.09 192.00 208.00 216.00 216.00 248.00 ▃▂▇▂▁
chapters 0 1 10.47 0.67 9.00 10.00 10.00 11.00 12.00 ▁▇▁▆▁
sales_mio 0 1 2.22 0.45 1.44 1.78 2.33 2.61 2.90 ▇▅▃▇▇
last_movie_days 0 1 485.86 326.63 44.00 230.00 419.00 673.00 1297.00 ▇▆▅▂▂
vixi_avg 0 1 19.90 8.57 10.55 14.14 16.77 23.10 48.06 ▇▃▁▁▁

3.2 Summary: stargazer

pacman::p_load(pastecs, stargazer)

(
  summ01 <- op_sales %>%
    select(pages:vixi_avg) %>%
    rename("Chapters" = chapters,
           "Days from Last Movie" = last_movie_days,
           "Pages" = pages,
           "Total Sales (in millions)" = sales_mio,
           "VIX Index (in average)" = vixi_avg)
)
summ01 %>%
  as.data.frame() %>% 
  stargazer(type = 'text', out = "descsumm.txt", digits = 1) #flip = TRUE
## 
## ========================================================================
## Statistic                 N  Mean  St. Dev. Min  Pctl(25) Pctl(75)  Max 
## ------------------------------------------------------------------------
## Pages                     51 214.9   11.1   192    208      216     248 
## Chapters                  51 10.5    0.7     9      10       11     12  
## Total Sales (in millions) 51  2.2    0.4    1.4    1.8      2.6     2.9 
## Days from Last Movie      51 485.9  326.6    44    230      673    1,297
## VIX Index (in average)    51 19.9    8.6    10.5   14.1     23.1   48.1 
## ------------------------------------------------------------------------

3.3 Summary: dplyr

(
  summ02 <- db01 %>%
    group_by(obsvar) %>%
    summarise(
      Obs = n(),
      Mean = mean(obsval, na.rm = TRUE),
      Std.Dev = sd(obsval, na.rm = TRUE),
      Min. = min(obsval, na.rm = TRUE),
      Max. = max(obsval, na.rm = TRUE),
      P25 = quantile(obsval, 0.25, na.rm = TRUE),
      P75 = quantile(obsval, 0.75, na.rm = TRUE),
      Var. = var(obsval, na.rm = TRUE)
  )
)

3.4 Summary: parameters

pacman::p_load(parameters)

(
  summ03 <- op_sales %>%
    select(pages:vixi_avg) %>%
    rename("Chapters" = chapters,
           "Days from Last Movie" = last_movie_days,
           "Pages" = pages,
           "Total Sales (in millions)" = sales_mio,
           "VIX Index (in average)" = vixi_avg) %>%
    describe_distribution()
)

4 Correlations

library(corrr)
## 
## Attaching package: 'corrr'
## The following object is masked from 'package:skimr':
## 
##     focus
(
  corr01 <- op_sales %>% 
    select(pages:vixi_avg) %>%
    rename("Chapters" = chapters,
           "Days from Last Movie" = last_movie_days,
           "Pages" = pages,
           "Total Sales (in millions)" = sales_mio,
           "VIX Index (in average)" = vixi_avg) %>% 
    correlate() %>% 
    shave() %>% 
    fashion()
)
## 
## Correlation method: 'pearson'
## Missing treated using: 'pairwise.complete.obs'

5 Unit Root Test

pacman::p_load(tidyr, tseries)

(
  unrt01 <- db01 %>%
    group_by(obsvar) %>%
    summarise(
      box.pvalue = Box.test(obsval, lag=1, type="Ljung-Box")$p.value,
      box = Box.test(obsval, lag=1, type="Ljung-Box")$p.value<0.05,
      adf.pvalue = adf.test(obsval, alternative = "stationary")$p.value,
      adf = adf.test(obsval, alternative = "stationary")$p.value<0.05,
      kpss.pvalue=kpss.test(obsval)$p.value,
      kpss=kpss.test(obsval)$p.value>0.05
      )
)
## Warning in adf.test(obsval, alternative = "stationary"): p-value smaller than
## printed p-value

## Warning in adf.test(obsval, alternative = "stationary"): p-value smaller than
## printed p-value
## Warning in kpss.test(obsval): p-value greater than printed p-value

## Warning in kpss.test(obsval): p-value greater than printed p-value

6 Simple Regressions

6.1 Initial specifications

\[ \text{Sales}_t = \alpha_0 + \alpha_1 \text{Chapters}_t + \alpha_2 \text{LastMovie}_t + \alpha_3 \text{VIX index}_t + \upsilon_t \]

\[ \text{Sales}_t = \beta_0 + \beta_1 \text{Pages}_t + \beta_2 \text{LastMovie}_t + \beta_3 \text{VIX index}_t + \epsilon_t \]

6.2 Modelling: base

ols01bs <- lm(sales_mio ~ chapters + last_movie_days + vixi_avg, 
                data = op_sales)

summary(ols01bs)
## 
## Call:
## lm(formula = sales_mio ~ chapters + last_movie_days + vixi_avg, 
##     data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74522 -0.31147 -0.04718  0.34711  0.76190 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      1.6107927  0.9671887   1.665  0.10248   
## chapters         0.1028203  0.0879605   1.169  0.24832   
## last_movie_days -0.0001212  0.0001866  -0.649  0.51928   
## vixi_avg        -0.0205174  0.0071327  -2.877  0.00603 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4149 on 47 degrees of freedom
## Multiple R-squared:  0.1847, Adjusted R-squared:  0.1326 
## F-statistic: 3.548 on 3 and 47 DF,  p-value: 0.02135
ols02bs <- lm(sales_mio ~ pages + last_movie_days + vixi_avg, 
                data = op_sales)

summary(ols02bs)
## 
## Call:
## lm(formula = sales_mio ~ pages + last_movie_days + vixi_avg, 
##     data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73780 -0.24534 -0.03507  0.26933  0.70463 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)   
## (Intercept)     -4.963e-01  1.099e+00  -0.451  0.65377   
## pages            1.481e-02  4.997e-03   2.964  0.00475 **
## last_movie_days -6.218e-05  1.750e-04  -0.355  0.72402   
## vixi_avg        -2.194e-02  6.591e-03  -3.329  0.00170 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3864 on 47 degrees of freedom
## Multiple R-squared:  0.2931, Adjusted R-squared:  0.248 
## F-statistic: 6.497 on 3 and 47 DF,  p-value: 0.0009077

6.3 Modelling: interaction

ols03bs <- lm(sales_mio ~ chapters + last_movie_days + vixi_avg + 
                last_movie_days * vixi_avg, 
                data = op_sales)

summary(ols03bs)
## 
## Call:
## lm(formula = sales_mio ~ chapters + last_movie_days + vixi_avg + 
##     last_movie_days * vixi_avg, data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.74097 -0.30324 -0.04414  0.34142  0.76512 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)               1.602e+00  9.789e-01   1.636    0.109
## chapters                  1.005e-01  8.999e-02   1.117    0.270
## last_movie_days          -1.371e-05  6.794e-04  -0.020    0.984
## vixi_avg                 -1.856e-02  1.391e-02  -1.334    0.189
## last_movie_days:vixi_avg -6.468e-06  3.928e-05  -0.165    0.870
## 
## Residual standard error: 0.4193 on 46 degrees of freedom
## Multiple R-squared:  0.1851, Adjusted R-squared:  0.1143 
## F-statistic: 2.613 on 4 and 46 DF,  p-value: 0.0474
ols04bs <- lm(sales_mio ~ pages + last_movie_days + vixi_avg + 
                last_movie_days * vixi_avg, 
                data = op_sales)

summary(ols04bs)
## 
## Call:
## lm(formula = sales_mio ~ pages + last_movie_days + vixi_avg + 
##     last_movie_days * vixi_avg, data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.73572 -0.24592 -0.03259  0.27035  0.70360 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)   
## (Intercept)              -4.948e-01  1.112e+00  -0.445  0.65842   
## pages                     1.484e-02  5.101e-03   2.910  0.00556 **
## last_movie_days          -8.746e-05  6.305e-04  -0.139  0.89028   
## vixi_avg                 -2.241e-02  1.303e-02  -1.720  0.09222 . 
## last_movie_days:vixi_avg  1.525e-06  3.650e-05   0.042  0.96685   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3905 on 46 degrees of freedom
## Multiple R-squared:  0.2932, Adjusted R-squared:  0.2317 
## F-statistic: 4.769 on 4 and 46 DF,  p-value: 0.002651

6.4 Modelling: lagging

ols05bs <- lm(sales_mio ~ chapters + lag(chapters, n = 1L) + 
                last_movie_days + vixi_avg + last_movie_days * vixi_avg, 
                data = op_sales)

summary(ols05bs)
## 
## Call:
## lm(formula = sales_mio ~ chapters + lag(chapters, n = 1L) + last_movie_days + 
##     vixi_avg + last_movie_days * vixi_avg, data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79311 -0.30921 -0.05747  0.23425  0.77016 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)
## (Intercept)               1.106e+00  1.398e+00   0.791    0.433
## chapters                  8.423e-02  8.944e-02   0.942    0.351
## lag(chapters, n = 1)      6.609e-02  9.371e-02   0.705    0.484
## last_movie_days          -4.366e-05  6.999e-04  -0.062    0.951
## vixi_avg                 -1.819e-02  1.434e-02  -1.268    0.211
## last_movie_days:vixi_avg -6.773e-06  4.151e-05  -0.163    0.871
## 
## Residual standard error: 0.4116 on 44 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.1992, Adjusted R-squared:  0.1082 
## F-statistic: 2.189 on 5 and 44 DF,  p-value: 0.07249
ols06bs <- lm(sales_mio ~ pages + lag(pages, n = 1L) + 
                last_movie_days + vixi_avg + last_movie_days * vixi_avg, 
                data = op_sales)

summary(ols06bs)
## 
## Call:
## lm(formula = sales_mio ~ pages + lag(pages, n = 1L) + last_movie_days + 
##     vixi_avg + last_movie_days * vixi_avg, data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.60952 -0.24881 -0.03402  0.19936  0.71790 
## 
## Coefficients:
##                            Estimate Std. Error t value Pr(>|t|)  
## (Intercept)              -2.373e+00  1.357e+00  -1.750   0.0871 .
## pages                     1.249e-02  4.831e-03   2.586   0.0131 *
## lag(pages, n = 1)         1.123e-02  5.087e-03   2.207   0.0326 *
## last_movie_days          -1.773e-04  5.940e-04  -0.298   0.7668  
## vixi_avg                 -2.429e-02  1.241e-02  -1.957   0.0567 .
## last_movie_days:vixi_avg  9.207e-06  3.507e-05   0.263   0.7941  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3629 on 44 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.3773, Adjusted R-squared:  0.3066 
## F-statistic: 5.333 on 5 and 44 DF,  p-value: 0.0006463

6.5 Modelling: 1st difference

ols07bs <- lm(diff(sales_mio, lag = 1) ~ diff(chapters, lag = 1) + 
                diff(last_movie_days, lag = 1) + diff(vixi_avg, lag = 1), 
                data = op_sales)

summary(ols07bs)
## 
## Call:
## lm(formula = diff(sales_mio, lag = 1) ~ diff(chapters, lag = 1) + 
##     diff(last_movie_days, lag = 1) + diff(vixi_avg, lag = 1), 
##     data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25826 -0.11457 -0.01094  0.10991  0.29096 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     6.374e-03  2.090e-02   0.305    0.762
## diff(chapters, lag = 1)         2.337e-02  2.236e-02   1.045    0.301
## diff(last_movie_days, lag = 1) -6.328e-05  7.892e-05  -0.802    0.427
## diff(vixi_avg, lag = 1)         1.567e-03  2.917e-03   0.537    0.594
## 
## Residual standard error: 0.1476 on 46 degrees of freedom
## Multiple R-squared:  0.04947,    Adjusted R-squared:  -0.01252 
## F-statistic: 0.798 on 3 and 46 DF,  p-value: 0.5013
ols08bs <- lm(diff(sales_mio, lag = 1) ~ diff(pages, lag = 1) + 
                diff(last_movie_days, lag = 1) + diff(vixi_avg, lag = 1), 
                data = op_sales)

summary(ols08bs)
## 
## Call:
## lm(formula = diff(sales_mio, lag = 1) ~ diff(pages, lag = 1) + 
##     diff(last_movie_days, lag = 1) + diff(vixi_avg, lag = 1), 
##     data = op_sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26910 -0.11739 -0.01116  0.10990  0.28065 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)                     6.701e-03  2.091e-02   0.320    0.750
## diff(pages, lag = 1)            1.570e-03  1.553e-03   1.011    0.317
## diff(last_movie_days, lag = 1) -7.206e-05  7.802e-05  -0.924    0.360
## diff(vixi_avg, lag = 1)         1.497e-03  2.925e-03   0.512    0.611
## 
## Residual standard error: 0.1477 on 46 degrees of freedom
## Multiple R-squared:  0.04803,    Adjusted R-squared:  -0.01405 
## F-statistic: 0.7737 on 3 and 46 DF,  p-value: 0.5147

6.6 Modelling: dplyr

pacman::p_load(broom)
(
  ols01dp <- op_sales %>%
    select(pages:vixi_avg) %>%
    lm(sales_mio ~ chapters + last_movie_days + vixi_avg, 
       data = .) %>%
    tidy()
)
(
  ols02dp <- op_sales %>%
    select(pages:vixi_avg) %>%
    lm(sales_mio ~ pages + last_movie_days + vixi_avg, 
       data = .) %>%
    tidy()
)

6.7 Modelling: estimatr

pacman::p_load(estimatr)

(
  ols01es <- op_sales %>%
    lm_robust(sales_mio ~ chapters + last_movie_days + vixi_avg,
              data = . ,se_type = "stata") %>%
    tidy()
)
pacman::p_load(estimatr)

(
  ols02es <- op_sales %>%
    lm_robust(sales_mio ~ pages + last_movie_days + vixi_avg,
              data = . ,se_type = "stata") %>%
    tidy()
)

6.8 Export: stargazer

stargazer(ols01bs, ols02bs, type = "text", out = "ols_base.txt")
## 
## ==========================================================
##                                   Dependent variable:     
##                               ----------------------------
##                                        sales_mio          
##                                    (1)            (2)     
## ----------------------------------------------------------
## chapters                          0.103                   
##                                  (0.088)                  
##                                                           
## pages                                          0.015***   
##                                                 (0.005)   
##                                                           
## last_movie_days                  -0.0001        -0.0001   
##                                  (0.0002)      (0.0002)   
##                                                           
## vixi_avg                        -0.021***      -0.022***  
##                                  (0.007)        (0.007)   
##                                                           
## Constant                          1.611         -0.496    
##                                  (0.967)        (1.099)   
##                                                           
## ----------------------------------------------------------
## Observations                        51            51      
## R2                                0.185          0.293    
## Adjusted R2                       0.133          0.248    
## Residual Std. Error (df = 47)     0.415          0.386    
## F Statistic (df = 3; 47)         3.548**       6.497***   
## ==========================================================
## Note:                          *p<0.1; **p<0.05; ***p<0.01

6.9 Export: texreg

pacman::p_load(texreg)

screenreg(list(ols01bs, ols02bs), digits = 2)
## 
## ===================================
##                  Model 1   Model 2 
## -----------------------------------
## (Intercept)       1.61     -0.50   
##                  (0.97)    (1.10)  
## chapters          0.10             
##                  (0.09)            
## last_movie_days  -0.00     -0.00   
##                  (0.00)    (0.00)  
## vixi_avg         -0.02 **  -0.02 **
##                  (0.01)    (0.01)  
## pages                       0.01 **
##                            (0.00)  
## -----------------------------------
## R^2               0.18      0.29   
## Adj. R^2          0.13      0.25   
## Num. obs.        51        51      
## ===================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
#knitreg(list(ols01bs, ols02bs), digits = 2)
#wordreg(list(ols01bs, ols02bs), digits = 2, file = "ols_reg.doc")
plotreg(list(ols01bs, ols02bs), digits = 2)
## Models: bars denote 0.5 (inner) resp. 0.95 (outer) confidence intervals (computed from standard errors).

6.10 Export: lm_robust & wordreg

#doesn't work: lm_robust & stargazer & texreg
ols03es <- lm_robust(sales_mio ~ chapters + last_movie_days + vixi_avg,
              data = op_sales, se_type = "stata")

ols04es <- lm_robust(sales_mio ~ pages + last_movie_days + vixi_avg,
              data = op_sales, se_type = "stata")

#stargazer(ols03es, ols04es, type = "text", out = "ols_estimatr.txt")
#wordreg(list(ols03es, ols04es), digits = 2, file = "ols_estimatr.doc")

6.11 Plot: sjPlot single

#source: http://www.strengejacke.de/sjPlot/index.html
pacman::p_load(sjPlot, sjmisc, sjlabelled)
theme_set(theme_test())

plot_model(ols01bs,
           title = "Sales (in mio)",
           show.values = TRUE, 
           type = "est", 
           digits = 4) +
  #edit with ggplot2
  scale_y_continuous(limits = c(-0.12,0.12))
## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

plot_model(ols02bs,
           title = "Sales (in mio)",
           show.values = TRUE, 
           type = "est", 
           digits = 4) +
  #edit with ggplot2
  scale_y_continuous(limits = c(-0.12,0.12))
## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

6.12 Plot: sjPlot multiple

plot_models(ols01bs, ols02bs,
            title = "Sales (in mio)",
            show.values = TRUE, 
            show.intercept = FALSE, 
            line.size = 0.1,
            p.shape = TRUE,
            value.size = 3,
            digits = 4) +
  #edit with ggplot2
  scale_y_continuous(limits = c(-0.12,0.12)) +
  theme(
      axis.text.x = element_text(size = 8),
      axis.ticks.x = element_blank(),
      axis.line.x = element_blank(),
      axis.title.x = element_text(size = 8),
      axis.line.y = element_blank(),
      axis.title.y = element_text(size = 8),
      axis.text.y = element_text(size = 8),
      plot.title.position = "plot",
      plot.caption.position = "plot",
      legend.title = element_text(size = 8),
      legend.text = element_text(size = 8),
      legend.key.size = unit(0.5, "cm"),
      legend.spacing = unit(1, "cm"),
      plot.title = element_text(hjust = 0, size = 16, face = "bold"),
      plot.subtitle = element_text(hjust = 0, size = 12),
      plot.caption = element_text(hjust = 0, size = 12),
      # legend.title = element_blank(),
      legend.position = "bottom",
      strip.text.x = element_text(size = 8),
      panel.grid.major.y = element_line(colour = "grey97"),
      panel.ontop = FALSE,
      panel.spacing = unit(1, "lines")
      )
## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.

## Warning: Argument 'df_method' is deprecated. Please use 'ci_method' instead.
## Scale for 'y' is already present. Adding another scale for 'y', which will
## replace the existing scale.

6.13 Plot: forestmodel

library(forestmodel)

forest_model(ols01bs, theme = theme_forest())

forest_model(ols02bs, theme = theme_forest())

6.14 Performance: glance

gl01bs <- broom::glance(ols01bs) %>% t() %>% round(digits = 2)
gl02bs <- broom::glance(ols02bs) %>% t() %>% round(digits = 2)

cbind(gl01bs, gl02bs)
##                 [,1]   [,2]
## r.squared       0.18   0.29
## adj.r.squared   0.13   0.25
## sigma           0.41   0.39
## statistic       3.55   6.50
## p.value         0.02   0.00
## df              3.00   3.00
## logLik        -25.42 -21.78
## AIC            60.85  53.57
## BIC            70.51  63.22
## deviance        8.09   7.02
## df.residual    47.00  47.00
## nobs           51.00  51.00

6.15 Performance: check model

pacman::p_load(patchwork, performance)
(
  ch01 <- check_model(ols01bs)
)

(
  ch02 <- check_model(ols02bs)
)

6.16 Performance: compare

#install.packages("performance", repos = "https://easystats.r-universe.dev")
library(performance)
library(see)

(
  comp <- compare_performance(ols01bs, ols02bs)
)
plot(comp)

7 Clearing

#clearing the environment
ls()
##  [1] "ch01"     "ch02"     "comp"     "corr01"   "db01"     "gl01bs"  
##  [7] "gl02bs"   "ols01bs"  "ols01dp"  "ols01es"  "ols02bs"  "ols02dp" 
## [13] "ols02es"  "ols03bs"  "ols03es"  "ols04bs"  "ols04es"  "ols05bs" 
## [19] "ols06bs"  "ols07bs"  "ols08bs"  "op_sales" "plot01"   "plot02"  
## [25] "plot03"   "summ01"   "summ02"   "summ03"   "unrt01"
rm(list=ls())

8 Acknowledgements

8.1 References

 

Contact me at ap.sulistiono@gmail.com