SEM in R

Path Analysis
Lavaan uses “~” to denote regression such that the dv is to the left of the ~

library(lavaan)

## This is lavaan 0.5-23.1097

## lavaan is BETA software! Please report any bugs.

library(haven)

## Warning: package 'haven' was built under R version 3.4.4

dataissues16 <- read_sav("~/Downloads/dataissues16.sav") #path name for where the file is located

model = 'pcomp~wid+sbvoc'
pathanalysis = sem(model, dataissues16)
summary(pathanalysis)

## lavaan (0.5-23.1097) converged normally after  10 iterations
## 
##   Number of observations                           101
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pcomp ~                                             
##     wid               0.250    0.040    6.270    0.000
##     sbvoc             0.630    0.155    4.057    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pcomp            17.932    2.523    7.106    0.000

model = 'pcomp~wid+sbvoc
          wid~sbvoc'
results = sem(model, dataissues16)
summary(results)

## lavaan (0.5-23.1097) converged normally after  19 iterations
## 
##   Number of observations                           101
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   pcomp ~                                             
##     wid               0.250    0.040    6.270    0.000
##     sbvoc             0.630    0.155    4.057    0.000
##   wid ~                                               
##     sbvoc             1.756    0.345    5.090    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .pcomp            17.932    2.523    7.106    0.000
##    .wid             111.276   15.659    7.106    0.000

library(semPlot)
semPaths(results)

CFA
When using summary data with standard deviations and correlations you can get the covariance matrix by using the cor2cov function. First we define the lower triangle of the correlation matrix, then we get the full matrix using getCov(). You can add in variable names using the names= argument in getCov(). Then we turn this into covariance using correlation and standard deviations.
Next, we define our model. Lavaan uses “=~” to define latent variables.
When we run the cfa we include the covariance matrix and the sample size.
Plot the results with semPaths(). Include “est” to have estimates put on the plot.

library(lavaan)
sd = c(1.43, 0.555, 1.38, 0.503)
lower = '1,
        .454, 1,
        .526, .247, 1,
        .377, .309, .548, 1'
corel = getCov(lower, lower = T, names = c("psych67", "phys67", "psych71", "phys71"))
cov = cor2cov(corel, sd)
model = 'PSYDIS67=~psych67+phys67
        PSYDIS71=~psych71+phys71'
results = cfa(model, sample.cov = cov, sample.nobs = 603)
summary(results)

## lavaan (0.5-23.1097) converged normally after  33 iterations
## 
##   Number of observations                           603
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic               18.304
##   Degrees of freedom                                 1
##   P-value (Chi-square)                           0.000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   PSYDIS67 =~                                         
##     psych67           1.000                           
##     phys67            0.208    0.026    7.893    0.000
##   PSYDIS71 =~                                         
##     psych71           1.000                           
##     phys71            0.271    0.024   11.140    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   PSYDIS67 ~~                                         
##     PSYDIS71          1.029    0.091   11.372    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .psych67           0.313    0.186    1.679    0.093
##    .phys67            0.233    0.016   14.889    0.000
##    .psych71           0.498    0.112    4.466    0.000
##    .phys71            0.150    0.012   12.803    0.000
##     PSYDIS67          1.728    0.219    7.894    0.000
##     PSYDIS71          1.403    0.151    9.291    0.000

parameterEstimates(results)

##         lhs op      rhs   est    se      z pvalue ci.lower ci.upper
## 1  PSYDIS67 =~  psych67 1.000 0.000     NA     NA    1.000    1.000
## 2  PSYDIS67 =~   phys67 0.208 0.026  7.893  0.000    0.156    0.260
## 3  PSYDIS71 =~  psych71 1.000 0.000     NA     NA    1.000    1.000
## 4  PSYDIS71 =~   phys71 0.271 0.024 11.140  0.000    0.223    0.318
## 5   psych67 ~~  psych67 0.313 0.186  1.679  0.093   -0.052    0.679
## 6    phys67 ~~   phys67 0.233 0.016 14.889  0.000    0.202    0.263
## 7   psych71 ~~  psych71 0.498 0.112  4.466  0.000    0.280    0.717
## 8    phys71 ~~   phys71 0.150 0.012 12.803  0.000    0.127    0.173
## 9  PSYDIS67 ~~ PSYDIS67 1.728 0.219  7.894  0.000    1.299    2.157
## 10 PSYDIS71 ~~ PSYDIS71 1.403 0.151  9.291  0.000    1.107    1.699
## 11 PSYDIS67 ~~ PSYDIS71 1.029 0.091 11.372  0.000    0.852    1.207

fitMeasures(results)

##                npar                fmin               chisq 
##               9.000               0.015              18.304 
##                  df              pvalue      baseline.chisq 
##               1.000               0.000             579.458 
##         baseline.df     baseline.pvalue                 cfi 
##               6.000               0.000               0.970 
##                 tli                nnfi                 rfi 
##               0.819               0.819               0.810 
##                 nfi                pnfi                 ifi 
##               0.968               0.161               0.970 
##                 rni                logl   unrestricted.logl 
##               0.970           -2780.396           -2771.244 
##                 aic                 bic              ntotal 
##            5578.791            5618.408             603.000 
##                bic2               rmsea      rmsea.ci.lower 
##            5589.836               0.169               0.107 
##      rmsea.ci.upper        rmsea.pvalue                 rmr 
##               0.241               0.001               0.012 
##          rmr_nomean                srmr        srmr_bentler 
##               0.012               0.034               0.034 
## srmr_bentler_nomean         srmr_bollen  srmr_bollen_nomean 
##               0.034               0.034               0.034 
##          srmr_mplus   srmr_mplus_nomean               cn_05 
##               0.034               0.034             127.555 
##               cn_01                 gfi                agfi 
##             219.583               0.985               0.853 
##                pgfi                 mfi                ecvi 
##               0.099               0.986               0.060

inspect(results)

## $lambda
##         PSYDIS6 PSYDIS7
## psych67       0       0
## phys67        1       0
## psych71       0       0
## phys71        0       2
## 
## $theta
##         psyc67 phys67 psyc71 phys71
## psych67 3                          
## phys67  0      4                   
## psych71 0      0      5            
## phys71  0      0      0      6     
## 
## $psi
##          PSYDIS6 PSYDIS7
## PSYDIS67 7              
## PSYDIS71 9       8

library(semPlot)
semPaths(results, what = "est")

Nested model comparison
Just as we did above we define and run the models. To compare nested models we use the anova() function. anova() is a univeral model comparison function that knows what type of comparison to do based on the class of the objects that you put into it. In this case it uses chi square difference testing.

library(lavaan)
sd = c(1.43, 0.555, 1.38, 0.503)
lower = '1,
.454, 1,
.526, .247, 1,
.377, .309, .548, 1'
corel = getCov(lower, lower = T, names = c("psych67", "phys67", "psych71", "phys71"))
cov = cor2cov(corel, sd)
model = 'PSYDIS67=~psych67+phys67
        PSYDIS71=~psych71+phys71'
results = cfa(model, sample.cov = cov, sample.nobs = 603)

model2 = 'PSYDIS=~psych67+phys67+psych71+phys71'
results2 = cfa(model2, sample.cov = cov, sample.nobs = 603)

anova(results, results2) #note the anova function is not ANOVA like you think, it compares models, it automatically knows what type of comparison based on the models you put in (class lavaan)

## Chi Square Difference Test
## 
##          Df    AIC    BIC  Chisq Chisq diff Df diff Pr(>Chisq)    
## results   1 5578.8 5618.4 18.303                                  
## results2  2 5627.1 5662.3 68.621     50.318       1  1.308e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Constraining variance
Lavaan uses “~~” to represent variance/covariance.

library(lavaan)
d = read.delim(file = "ex5.1HW.txt", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
model = 'f1=~V1+V2+V3+V4'
results1 = sem(model, d)
summary(results1, standardized = T) #include standardized estimates

## lavaan (0.5-23.1097) converged normally after  26 iterations
## 
##   Number of observations                           432
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                1.210
##   Degrees of freedom                                 2
##   P-value (Chi-square)                           0.546
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               0.941    0.671
##     V2                1.141    0.114   10.012    0.000    1.073    0.767
##     V3                0.969    0.095   10.151    0.000    0.912    0.668
##     V4               -0.064    0.085   -0.755    0.450   -0.060   -0.042
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                1.082    0.106   10.181    0.000    1.082    0.550
##    .V2                0.807    0.114    7.090    0.000    0.807    0.412
##    .V3                1.033    0.101   10.264    0.000    1.033    0.554
##    .V4                2.064    0.140   14.688    0.000    2.064    0.998
##     f1                0.885    0.136    6.528    0.000    1.000    1.000

#drop V4
model = 'f1=~V1+V2+V3'
results1 = sem(model, d)
summary(results1, standardized = T)

## lavaan (0.5-23.1097) converged normally after  22 iterations
## 
##   Number of observations                           432
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               0.942    0.672
##     V2                1.136    0.113   10.017    0.000    1.070    0.765
##     V3                0.969    0.095   10.152    0.000    0.913    0.669
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                1.079    0.106   10.150    0.000    1.079    0.549
##    .V2                0.812    0.114    7.148    0.000    0.812    0.415
##    .V3                1.031    0.101   10.241    0.000    1.031    0.553
##     f1                0.888    0.136    6.538    0.000    1.000    1.000

#no dfs because it is the fully saturated model

#drop V3
model = 'f1=~V1+V2'
results = sem(model, d)

## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING: could not compute standard errors!
##   lavaan NOTE: this may be a symptom that the model is not identified.

summary(results, standardized = T)

## lavaan (0.5-23.1097) converged normally after  12 iterations
## 
##   Number of observations                           432
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                   NA
##   Degrees of freedom                                -1
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               1.036    0.739
##     V2                0.940       NA                      0.974    0.696
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                0.894       NA                      0.894    0.455
##    .V2                1.010       NA                      1.010    0.516
##     f1                1.073       NA                      1.000    1.000

#NAs because underidentified


#constrain variance of V1 and V2 to be equal
model = 'f1=~V1+V2
          V1~~a*V1
          V2~~a*V2'
results = sem(model, d)
summary(results, standardized = T)

## lavaan (0.5-23.1097) converged normally after  13 iterations
## 
##   Number of observations                           432
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               1.006    0.718
##     V2                0.996    0.080   12.451    0.000    1.002    0.716
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1         (a)    0.954    0.065   14.697    0.000    0.954    0.485
##    .V2         (a)    0.954    0.065   14.697    0.000    0.954    0.487
##     f1                1.013    0.134    7.565    0.000    1.000    1.000

#variance of V1
var(d$V1)

## [1] 1.971384

#V1 only, contrain variance to be .1(var of variable) = .1(1.971) = .197
model = 'f1=~V1
        V1~~.197*V1'
results = sem(model, d)
summary(results, standardized = T)

## lavaan (0.5-23.1097) converged normally after   1 iterations
## 
##   Number of observations                           432
## 
##   Estimator                                         ML
##   Minimum Function Test Statistic                0.000
##   Degrees of freedom                                 0
##   Minimum Function Value               0.0000000000000
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                             Standard
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               1.330    0.949
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                0.197                               0.197    0.100
##     f1                1.770    0.134   13.225    0.000    1.000    1.000

Using different estimators
Change estimation methods by using argument estimator =
When using MLM if you want to compare models using satorra bentler by changing the method argument in either the anova() or lavTestLRT() function.
Unforturnately, R does not have a way to compare WLSMV models like difftest (yet)

library(lavaan)
d = read.delim(file = "ex5.1.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
model = 'f1=~V1+V2+V3
        f2=~V4+V5+V6'
results = sem(model, d)

model2 = 'f1=~V1+V2+V3+V4+V5+V6'
results2 = sem(model2, d)

anova(results, results2)

## Chi Square Difference Test
## 
##          Df     AIC   BIC    Chisq Chisq diff Df diff Pr(>Chisq)    
## results   8  9839.2  9894   3.8957                                  
## results2  9 10065.4 10116 232.0462     228.15       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(results2, results) #note order does not matter, R automatically orders correctly

## Chi Square Difference Test
## 
##          Df     AIC   BIC    Chisq Chisq diff Df diff Pr(>Chisq)    
## results   8  9839.2  9894   3.8957                                  
## results2  9 10065.4 10116 232.0462     228.15       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

###############
model = 'f1=~V1+V2+V3
        f2=~V4+V5+V6'
results = sem(model, d, estimator = "MLM")
summary(results, standardized = T)

## lavaan (0.5-23.1097) converged normally after  34 iterations
## 
##   Number of observations                           500
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic                3.896       3.903
##   Degrees of freedom                                 8           8
##   P-value (Chi-square)                           0.866       0.866
##   Scaling correction factor                                  0.998
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               0.952    0.678
##     V2                1.127    0.098   11.510    0.000    1.073    0.769
##     V3                1.020    0.088   11.579    0.000    0.971    0.695
##   f2 =~                                                                 
##     V4                1.000                               0.872    0.609
##     V5                1.058    0.122    8.705    0.000    0.923    0.707
##     V6                0.897    0.108    8.271    0.000    0.782    0.604
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 ~~                                                                 
##     f2               -0.030    0.054   -0.556    0.578   -0.036   -0.036
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1               -0.022    0.063   -0.354    0.724   -0.022   -0.016
##    .V2                0.026    0.062    0.409    0.682    0.026    0.018
##    .V3                0.035    0.063    0.554    0.580    0.035    0.025
##    .V4               -0.022    0.064   -0.350    0.726   -0.022   -0.016
##    .V5               -0.016    0.058   -0.271    0.786   -0.016   -0.012
##    .V6                0.048    0.058    0.823    0.411    0.048    0.037
##     f1                0.000                               0.000    0.000
##     f2                0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                1.064    0.096   11.036    0.000    1.064    0.540
##    .V2                0.798    0.108    7.417    0.000    0.798    0.409
##    .V3                1.010    0.092   10.976    0.000    1.010    0.517
##    .V4                1.290    0.124   10.402    0.000    1.290    0.629
##    .V5                0.854    0.108    7.919    0.000    0.854    0.501
##    .V6                1.066    0.091   11.706    0.000    1.066    0.635
##     f1                0.907    0.115    7.894    0.000    1.000    1.000
##     f2                0.761    0.124    6.124    0.000    1.000    1.000

results2 = sem(model2, d, estimator = "MLM")
summary(results2, standardized = T)

## lavaan (0.5-23.1097) converged normally after  27 iterations
## 
##   Number of observations                           500
## 
##   Estimator                                         ML      Robust
##   Minimum Function Test Statistic              232.046     228.904
##   Degrees of freedom                                 9           9
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.014
##     for the Satorra-Bentler correction
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   f1 =~                                                                 
##     V1                1.000                               0.952    0.678
##     V2                1.127    0.098   11.525    0.000    1.073    0.768
##     V3                1.021    0.088   11.589    0.000    0.972    0.695
##     V4               -0.047    0.082   -0.569    0.569   -0.044   -0.031
##     V5               -0.071    0.071   -0.990    0.322   -0.067   -0.052
##     V6                0.005    0.069    0.069    0.945    0.005    0.004
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1               -0.022    0.063   -0.354    0.724   -0.022   -0.016
##    .V2                0.026    0.062    0.409    0.682    0.026    0.018
##    .V3                0.035    0.063    0.554    0.580    0.035    0.025
##    .V4               -0.022    0.064   -0.350    0.726   -0.022   -0.016
##    .V5               -0.016    0.058   -0.271    0.786   -0.016   -0.012
##    .V6                0.048    0.058    0.823    0.411    0.048    0.037
##     f1                0.000                               0.000    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .V1                1.065    0.096   11.046    0.000    1.065    0.540
##    .V2                0.798    0.108    7.423    0.000    0.798    0.410
##    .V3                1.009    0.092   10.982    0.000    1.009    0.517
##    .V4                2.048    0.137   14.946    0.000    2.048    0.999
##    .V5                1.702    0.107   15.962    0.000    1.702    0.997
##    .V6                1.678    0.106   15.906    0.000    1.678    1.000
##     f1                0.906    0.115    7.890    0.000    1.000    1.000

lavTestLRT(results, results2, method = "satorra.bentler.2010")

## Scaled Chi Square Difference Test (method = "satorra.bentler.2010")
## 
##          Df     AIC     BIC    Chisq Chisq diff Df diff Pr(>Chisq)    
## results   8  9851.2  9931.3   3.8957                                  
## results2  9 10077.4 10153.2 232.0462     200.24       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

lavTestLRT(results, results2, method = "satorra.bentler.2001") #these give you the same results (I don't actually know what the difference between methods are)

## Scaled Chi Square Difference Test (method = "satorra.bentler.2001")
## 
##          Df     AIC     BIC    Chisq Chisq diff Df diff Pr(>Chisq)    
## results   8  9851.2  9931.3   3.8957                                  
## results2  9 10077.4 10153.2 232.0462     200.24       1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#################
library(lavaan)
d = read.delim(file = "ex5.2.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
d$V1 = ordered(d$V1)
d$V2 = ordered(d$V2)
d$V3 = ordered(d$V3)
d$V4 = ordered(d$V4)
d$V5 = ordered(d$V5)
d$V6 = ordered(d$V6)
model2f = 'f1=~V1+V2+V3
            f2=~V4+V5+V6'
results2f = sem(model2f, d, estimator = "WLSMV")
summary(results2f)

## lavaan (0.5-23.1097) converged normally after  15 iterations
## 
##   Number of observations                           500
## 
##   Estimator                                       DWLS      Robust
##   Minimum Function Test Statistic                2.454       5.482
##   Degrees of freedom                                 8           8
##   P-value (Chi-square)                           0.964       0.705
##   Scaling correction factor                                  0.628
##   Shift parameter                                            1.573
##     for simple second-order correction (Mplus variant)
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f1 =~                                               
##     V1                1.000                           
##     V2                1.067    0.043   24.637    0.000
##     V3                1.001    0.039   25.887    0.000
##   f2 =~                                               
##     V4                1.000                           
##     V5                1.109    0.054   20.461    0.000
##     V6                1.030    0.047   21.966    0.000
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f1 ~~                                               
##     f2               -0.021    0.049   -0.431    0.666
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .V1                0.000                           
##    .V2                0.000                           
##    .V3                0.000                           
##    .V4                0.000                           
##    .V5                0.000                           
##    .V6                0.000                           
##     f1                0.000                           
##     f2                0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     V1|t1             0.010    0.056    0.179    0.858
##     V2|t1             0.015    0.056    0.268    0.789
##     V3|t1            -0.010    0.056   -0.179    0.858
##     V4|t1             0.060    0.056    1.072    0.284
##     V5|t1             0.020    0.056    0.357    0.721
##     V6|t1             0.025    0.056    0.447    0.655
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .V1                0.200                           
##    .V2                0.088                           
##    .V3                0.198                           
##    .V4                0.264                           
##    .V5                0.094                           
##    .V6                0.219                           
##     f1                0.800    0.046   17.422    0.000
##     f2                0.736    0.052   14.085    0.000
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     V1                1.000                           
##     V2                1.000                           
##     V3                1.000                           
##     V4                1.000                           
##     V5                1.000                           
##     V6                1.000

model1f = 'f1=~V1+V2+V3+V4+V5'
results1f = sem(model1f, d, estimator = "WLSMV")
summary(results1f)

## lavaan (0.5-23.1097) converged normally after  17 iterations
## 
##   Number of observations                           500
## 
##   Estimator                                       DWLS      Robust
##   Minimum Function Test Statistic              527.364     449.180
##   Degrees of freedom                                 5           5
##   P-value (Chi-square)                           0.000       0.000
##   Scaling correction factor                                  1.178
##   Shift parameter                                            1.639
##     for simple second-order correction (Mplus variant)
## 
## Parameter Estimates:
## 
##   Information                                 Expected
##   Standard Errors                           Robust.sem
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)
##   f1 =~                                               
##     V1                1.000                           
##     V2                1.110    0.045   24.927    0.000
##     V3                1.005    0.040   25.272    0.000
##     V4               -0.684    0.055  -12.368    0.000
##     V5               -0.702    0.055  -12.771    0.000
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .V1                0.000                           
##    .V2                0.000                           
##    .V3                0.000                           
##    .V4                0.000                           
##    .V5                0.000                           
##     f1                0.000                           
## 
## Thresholds:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     V1|t1             0.010    0.056    0.179    0.858
##     V2|t1             0.015    0.056    0.268    0.789
##     V3|t1            -0.010    0.056   -0.179    0.858
##     V4|t1             0.060    0.056    1.072    0.284
##     V5|t1             0.020    0.056    0.357    0.721
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .V1                0.279                           
##    .V2                0.111                           
##    .V3                0.271                           
##    .V4                0.663                           
##    .V5                0.644                           
##     f1                0.721    0.043   16.627    0.000
## 
## Scales y*:
##                    Estimate  Std.Err  z-value  P(>|z|)
##     V1                1.000                           
##     V2                1.000                           
##     V3                1.000                           
##     V4                1.000                           
##     V5                1.000

IRT

library(lavaan)
d = read.delim(file = "ex5.2.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose

d1 = d[,1:3] #keep only the first 3 variables
d2 = d[,4:6] #vars 4-6

#rasch model
library(ltm)

## Loading required package: MASS

## Loading required package: msm

## Warning: package 'msm' was built under R version 3.4.3

## Loading required package: polycor

descript(d1)

## 
## Descriptive statistics for the 'd1' data-set
## 
## Sample:
##  3 items and 500 sample units; 0 missing values
## 
## Proportions for each level of response:
##        0     1  logit
## V1 0.504 0.496 -0.016
## V2 0.506 0.494 -0.024
## V3 0.496 0.504  0.016
## 
## 
## Frequencies of total scores:
##        0  1  2   3
## Freq 181 72 66 181
## 
## 
## Point Biserial correlation with Total Score:
##    Included Excluded
## V1   0.8602   0.6837
## V2   0.8832   0.7301
## V3   0.8603   0.6839
## 
## 
## Cronbach's alpha:
##               value
## All Items    0.8375
## Excluding V1 0.7895
## Excluding V2 0.7438
## Excluding V3 0.7893
## 
## 
## Pairwise Associations:
##   Item i Item j p.value
## 1      1      3  <2e-16
## 2      1      2  <2e-16
## 3      2      3  <2e-16

fit1 = rasch(d1, constraint = cbind(length(d1) + 1, 1))
summary(fit1)

## 
## Call:
## rasch(data = d1, constraint = cbind(length(d1) + 1, 1))
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -914.2365 1834.473 1847.117
## 
## Coefficients:
##             value std.err  z.vals
## Dffclt.V1  0.0200  0.1104  0.1814
## Dffclt.V2  0.0301  0.1104  0.2726
## Dffclt.V3 -0.0205  0.1104 -0.1857
## Dscrmn     1.0000      NA      NA
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.0073 
## quasi-Newton: BFGS

coef(fit1, prob = TRUE, order = FALSE)

##         Dffclt Dscrmn P(x=1|z=0)
## V1  0.02002113      1  0.4949949
## V2  0.03008916      1  0.4924783
## V3 -0.02050388      1  0.5051258

plot(fit1, legend = TRUE, cx = "bottomright", lwd = 1.5,
     cex.main = 1.5, cex.lab = 1.3, cex = .5)

plot(fit1, type = "IIC", annot = FALSE, lwd = 1.5, cex.main = 1.5, cex.lab = 1.3) #item information functions

plot(fit1, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
     cex.lab = 1.3) # total Test Information Function

#1pl
fit2 <- rasch(d1)

## Warning in rasch(d1): Hessian matrix at convergence is not positive definite; unstable solution.

summary(fit2)

## Warning in sqrt(diag(new.covar)): NaNs produced

## Warning in sqrt(diag(new.covar)): NaNs produced

## Warning in sqrt(Var[n.ind + 1, n.ind + 1]): NaNs produced

## 
## Call:
## rasch(data = d1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -800.1996 1608.399 1625.258
## 
## Coefficients:
##             value std.err  z.vals
## Dffclt.V1  0.0033     NaN     NaN
## Dffclt.V2  0.0044     NaN     NaN
## Dffclt.V3 -0.0011  0.0047 -0.2356
## Dscrmn    21.6585     NaN     NaN
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 8.5e-05 
## quasi-Newton: BFGS

coef(fit2, prob = TRUE, order = FALSE)

##          Dffclt   Dscrmn P(x=1|z=0)
## V1  0.003282365 21.65846  0.4822347
## V2  0.004379161 21.65846  0.4763063
## V3 -0.001098430 21.65846  0.5059473

plot(fit2, legend = TRUE, cx = "bottomright", lwd = 1.5,
     cex.main = 1.5, cex.lab = 1.3, cex = .5)

plot(fit2, type = "IIC", annot = FALSE, lwd = 3, cex.main = 1.5,
     cex.lab = 1.3) #item information functions

plot(fit2, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
     cex.lab = 1.3) # total Test Information Function

# two parameter logistic model
fit3 <- ltm(d1 ~ z1)
summary(fit3)

## 
## Call:
## ltm(formula = d1 ~ z1)
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -790.8677 1593.735 1619.023
## 
## Coefficients:
##             value std.err  z.vals
## Dffclt.V1  0.0175  0.0594  0.2947
## Dffclt.V2  0.0151  0.0426  0.3548
## Dffclt.V3 -0.0047  0.0593 -0.0786
## Dscrmn.V1  3.5439  0.5944  5.9621
## Dscrmn.V2  8.0957 11.4159  0.7092
## Dscrmn.V3  3.5618  0.6025  5.9116
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Convergence: 0 
## max(|grad|): 0.016 
## quasi-Newton: BFGS

coef(fit3, prob = TRUE, order = FALSE)

##          Dffclt   Dscrmn P(x=1|z=0)
## V1  0.017502488 3.543942  0.4844980
## V2  0.015114163 8.095659  0.4694483
## V3 -0.004662605 3.561795  0.5041517

plot(fit3, legend = TRUE, cx = "bottomright", lwd = 1.5,
     cex.main = 1.5, cex.lab = 1.3, cex = .5)

plot(fit3, legend= TRUE, type = "IIC", annot = FALSE, lwd = 1.5, cex.main = 1.5,
     cex.lab = 1.3) #item information functions

plot(fit3, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
     cex.lab = 1.3) # total Test Information Function

#3pl (includes guessing)
fit4 = tpm(d1, type = c("latent.trait"))

## Warning in tpm(d1, type = c("latent.trait")): Hessian matrix at convergence is not positive definite; unstable solution.

summary(fit4)

## 
## Call:
## tpm(data = d1, type = c("latent.trait"))
## 
## Model Summary:
##    log.Lik      AIC      BIC
##  -790.7946 1599.589 1637.521
## 
## Coefficients:
##            value std.err z.vals
## Gussng.V1 0.0118     NaN    NaN
## Gussng.V2 0.0040  0.0514 0.0770
## Gussng.V3 0.0132     NaN    NaN
## Dffclt.V1 0.0275     NaN    NaN
## Dffclt.V2 0.0133  0.0467 0.2842
## Dffclt.V3 0.0073     NaN    NaN
## Dscrmn.V1 3.7309     NaN    NaN
## Dscrmn.V2 8.8662 26.9651 0.3288
## Dscrmn.V3 3.7600     NaN    NaN
## 
## Integration:
## method: Gauss-Hermite
## quadrature points: 21 
## 
## Optimization:
## Optimizer: optim (BFGS)
## Convergence: 0 
## max(|grad|): 0.0027

plot(fit4)

#compare nested models
anova(fit1, fit2)

## 
##  Likelihood Ratio Table
##          AIC     BIC log.Lik    LRT df p.value
## fit1 1834.47 1847.12 -914.24                  
## fit2 1608.40 1625.26 -800.20 228.07  1  <0.001

anova(fit2, fit3)

## 
##  Likelihood Ratio Table
##          AIC     BIC log.Lik   LRT df p.value
## fit2 1608.40 1625.26 -800.20                 
## fit3 1593.74 1619.02 -790.87 18.66  2  <0.001

Mediation

#example showing which is mediator#
#model.M <- lm(M ~ X, myData)
#model.Y <- lm(Y ~ X + M, myData)
#library(mediation)
#results <- mediate(model.M, model.Y, treat='X', mediator='M',
#                   boot=TRUE, sims=500)
#summary(results)
##
library(mediation)

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 3.4.4

## Loading required package: mvtnorm

## Warning: package 'mvtnorm' was built under R version 3.4.3

## Loading required package: sandwich

## mediation: Causal Mediation Analysis
## Version: 4.4.6

d = dataissues16
model.m = lm(wid~sbvoc,d)
model.y = lm(pcomp~wid+sbvoc,d)
results <- mediate(model.m, model.y, treat='sbvoc', mediator='wid',
                   boot=TRUE, sims=1000)
summary(results) #ACME is indirect effect

## 
## Causal Mediation Analysis 
## 
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
## 
##                Estimate 95% CI Lower 95% CI Upper p-value    
## ACME              0.440        0.161         0.71  <2e-16 ***
## ADE               0.630        0.308         1.00  <2e-16 ***
## Total Effect      1.070        0.794         1.39  <2e-16 ***
## Prop. Mediated    0.411        0.163         0.67  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Sample Size Used: 101 
## 
## 
## Simulations: 1000

SEM in R

Ashley Edwards

December 4, 2018