ESY

Introduciton

In this post I show how to make a nice looking SEM diagram from a model object fitted with lavaan.

library(tidyverse)
library(lavaan) 
library(ggnetwork)

Lavaan Model

Below is the SEM model we are going to fit (from the lavaan website).

# Lavaan Model
model <- '
  # measurement model
    ind60 =~ x1 + x2 + x3
    dem60 =~ y1 + y2 + y3 + y4
    dem65 =~ y5 + y6 + y7 + y8
  # regressions
    dem60 ~ ind60
    dem65 ~ ind60 + dem60
  # residual correlations
    y1 ~~ y5
    y2 ~~ y4 + y6
    y3 ~~ y7
    y4 ~~ y8
    y6 ~~ y8
'
fit <- sem(model, data=PoliticalDemocracy)

Here is the output:

summary(fit, standardized=TRUE)

## lavaan 0.6-9 ended normally after 68 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        31
##                                                       
##   Number of observations                            75
##                                                       
## Model Test User Model:
##                                                       
##   Test statistic                                38.125
##   Degrees of freedom                                35
##   P-value (Chi-square)                           0.329
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   ind60 =~                                                              
##     x1                1.000                               0.670    0.920
##     x2                2.180    0.139   15.742    0.000    1.460    0.973
##     x3                1.819    0.152   11.967    0.000    1.218    0.872
##   dem60 =~                                                              
##     y1                1.000                               2.223    0.850
##     y2                1.257    0.182    6.889    0.000    2.794    0.717
##     y3                1.058    0.151    6.987    0.000    2.351    0.722
##     y4                1.265    0.145    8.722    0.000    2.812    0.846
##   dem65 =~                                                              
##     y5                1.000                               2.103    0.808
##     y6                1.186    0.169    7.024    0.000    2.493    0.746
##     y7                1.280    0.160    8.002    0.000    2.691    0.824
##     y8                1.266    0.158    8.007    0.000    2.662    0.828
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   dem60 ~                                                               
##     ind60             1.483    0.399    3.715    0.000    0.447    0.447
##   dem65 ~                                                               
##     ind60             0.572    0.221    2.586    0.010    0.182    0.182
##     dem60             0.837    0.098    8.514    0.000    0.885    0.885
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##  .y1 ~~                                                                 
##    .y5                0.624    0.358    1.741    0.082    0.624    0.296
##  .y2 ~~                                                                 
##    .y4                1.313    0.702    1.871    0.061    1.313    0.273
##    .y6                2.153    0.734    2.934    0.003    2.153    0.356
##  .y3 ~~                                                                 
##    .y7                0.795    0.608    1.308    0.191    0.795    0.191
##  .y4 ~~                                                                 
##    .y8                0.348    0.442    0.787    0.431    0.348    0.109
##  .y6 ~~                                                                 
##    .y8                1.356    0.568    2.386    0.017    1.356    0.338
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .x1                0.082    0.019    4.184    0.000    0.082    0.154
##    .x2                0.120    0.070    1.718    0.086    0.120    0.053
##    .x3                0.467    0.090    5.177    0.000    0.467    0.239
##    .y1                1.891    0.444    4.256    0.000    1.891    0.277
##    .y2                7.373    1.374    5.366    0.000    7.373    0.486
##    .y3                5.067    0.952    5.324    0.000    5.067    0.478
##    .y4                3.148    0.739    4.261    0.000    3.148    0.285
##    .y5                2.351    0.480    4.895    0.000    2.351    0.347
##    .y6                4.954    0.914    5.419    0.000    4.954    0.443
##    .y7                3.431    0.713    4.814    0.000    3.431    0.322
##    .y8                3.254    0.695    4.685    0.000    3.254    0.315
##     ind60             0.448    0.087    5.173    0.000    1.000    1.000
##    .dem60             3.956    0.921    4.295    0.000    0.800    0.800
##    .dem65             0.172    0.215    0.803    0.422    0.039    0.039

Create Nodes

Now we are going to create a nice data.frame to specify the locations of nodes (variables in the SEM model) and edges (paths connecting nodes). First, define where the nodes should be positioned spatially and create a data.frame to hold these data:

lavaan_parameters <- parameterestimates(fit)

nodes <- lavaan_parameters %>% 
  select(lhs) %>% 
  rename(name = lhs) %>% 
  distinct(name) %>% 
  mutate(
    x = case_when(str_detect(name,("^y"))      ~ 0,
                  name %in% c("dem60","dem65") ~ .33,
                  name == "ind60"              ~ .66,
                  name == "x1"                 ~ .6,
                  name == "x2"                 ~ .66,
                  name == "x3"                 ~ .72),
    y = case_when(name %in% c("x1","x2","x3")       ~ 1.05,
                  name == "y1"                      ~ 1.05,
                  name == "y2"                      ~ .9,
                  name %in% c("y3","dem60")         ~ .75,
                  name == "ind60"                   ~ .525,
                  name == "y4"                      ~ .6,
                  name == "y5"                      ~ .45,
                  name %in% c("y6","dem65")         ~ .3,
                  name == "y7"                      ~ .15,
                  name == "y8"                      ~  0),
    xend = x,
    yend = y
  )

Create Edges

Now the same for edges:

edges <- lavaan_parameters %>%
  filter(op %in% c("~","=~"))

Next we need to combine our nodes and edges into a single table so we can plot it with ggplot2. To do this, we will merge the nodes and edges in a specific way to get all information represented in a single data.frame:

combined <- nodes %>% 
  bind_rows(
    left_join(edges,nodes %>% select(name,x,y),by=c("lhs"="name")) %>%
      left_join(nodes %>% select(name,xend,yend),by = c("rhs"="name"))
  )

combined_edge_labels <- combined %>% 
  mutate(
    est = round(est,2),
    p.code     = ifelse(pvalue<.05,"p < .05","p > .05"),
    shape      = ifelse(str_detect(name,"y\\d|x\\d"),"observed","latent"),
    midpoint.x = (x + xend)/2,
    midpoint.y = (y + yend)/2,
    x2    = ifelse(op=="~",xend,x),
    xend2 = ifelse(op=="~",x,xend),
    y2    = ifelse(op=="~",yend,y),
    yend2 = ifelse(op=="~",y,yend),
    rise = yend2-y2,
    run  = x2-xend2,
    dist = sqrt(run^2 + rise^2) %>% round(2),
    newx = case_when(str_detect(rhs,"y\\d") ~ (x2 + (xend2 - x2) * .90),
                     str_detect(rhs,"x\\d") ~ (x2 + (xend2 - x2) * .75),
                     lhs == "dem65" & rhs == "dem60" ~ (x2 + (xend2 - x2) * .7),
                     lhs == "dem65" & rhs == "ind60" ~ (x2 + (xend2 - x2) * .85),
                     lhs == "dem60" & rhs == "ind60" ~ (x2 + (xend2 - x2) * .85)),
    newy = case_when(str_detect(rhs,"y\\d") ~ (y2 + (yend2 - y2) * .90),
                     str_detect(rhs,"x\\d") ~ (y2 + (yend2 - y2) * .85),
                     lhs == "dem65" & rhs == "dem60" ~ (y2 + (yend2 - y2) * .85),
                     lhs == "dem65" & rhs == "ind60" ~ (y2 + (yend2 - y2) * .9),
                     lhs == "dem60" & rhs == "ind60" ~ (y2 + (yend2 - y2) * .9)),
  )

Make the Diagram

Now we plot:

combined_edge_labels %>% 
  ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_edges(aes(x = x2, y = y2, xend = newx, yend = newy),
             arrow = arrow(length = unit(6, "pt"), type = "closed",ends = "last")) +
  geom_nodes(aes(shape = factor(shape,levels = c("observed","latent"))), color = "grey50",size = 16) +
  geom_nodetext(aes(label = name),fontface = "bold") +
  geom_label(aes(x = midpoint.x, y = midpoint.y, label = est), color = "black",label.size = NA,hjust = .5,vjust=.5) +
  scale_y_continuous(expand = c(.05,0)) +
  scale_shape_manual(values = c(15,19),guide=F) +
  theme_blank()

SEM Plots for Lavaan Models

Drawing path diagrams from lavaan objects in ggplot2

Introduciton

Lavaan Model

Create Nodes

Create Edges

Make the Diagram