Path Analysis
Lavaan uses “~” to denote regression such that the dv is to the left of the ~
library(lavaan)
## This is lavaan 0.5-23.1097
## lavaan is BETA software! Please report any bugs.
library(haven)
## Warning: package 'haven' was built under R version 3.4.4
dataissues16 <- read_sav("~/Downloads/dataissues16.sav") #path name for where the file is located
model = 'pcomp~wid+sbvoc'
pathanalysis = sem(model, dataissues16)
summary(pathanalysis)
## lavaan (0.5-23.1097) converged normally after 10 iterations
##
## Number of observations 101
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## pcomp ~
## wid 0.250 0.040 6.270 0.000
## sbvoc 0.630 0.155 4.057 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .pcomp 17.932 2.523 7.106 0.000
model = 'pcomp~wid+sbvoc
wid~sbvoc'
results = sem(model, dataissues16)
summary(results)
## lavaan (0.5-23.1097) converged normally after 19 iterations
##
## Number of observations 101
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Regressions:
## Estimate Std.Err z-value P(>|z|)
## pcomp ~
## wid 0.250 0.040 6.270 0.000
## sbvoc 0.630 0.155 4.057 0.000
## wid ~
## sbvoc 1.756 0.345 5.090 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .pcomp 17.932 2.523 7.106 0.000
## .wid 111.276 15.659 7.106 0.000
library(semPlot)
semPaths(results)
CFA
When using summary data with standard deviations and correlations you can get the covariance matrix by using the cor2cov function. First we define the lower triangle of the correlation matrix, then we get the full matrix using getCov(). You can add in variable names using the names= argument in getCov(). Then we turn this into covariance using correlation and standard deviations.
Next, we define our model. Lavaan uses “=~” to define latent variables.
When we run the cfa we include the covariance matrix and the sample size.
Plot the results with semPaths(). Include “est” to have estimates put on the plot.
library(lavaan)
sd = c(1.43, 0.555, 1.38, 0.503)
lower = '1,
.454, 1,
.526, .247, 1,
.377, .309, .548, 1'
corel = getCov(lower, lower = T, names = c("psych67", "phys67", "psych71", "phys71"))
cov = cor2cov(corel, sd)
model = 'PSYDIS67=~psych67+phys67
PSYDIS71=~psych71+phys71'
results = cfa(model, sample.cov = cov, sample.nobs = 603)
summary(results)
## lavaan (0.5-23.1097) converged normally after 33 iterations
##
## Number of observations 603
##
## Estimator ML
## Minimum Function Test Statistic 18.304
## Degrees of freedom 1
## P-value (Chi-square) 0.000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## PSYDIS67 =~
## psych67 1.000
## phys67 0.208 0.026 7.893 0.000
## PSYDIS71 =~
## psych71 1.000
## phys71 0.271 0.024 11.140 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## PSYDIS67 ~~
## PSYDIS71 1.029 0.091 11.372 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .psych67 0.313 0.186 1.679 0.093
## .phys67 0.233 0.016 14.889 0.000
## .psych71 0.498 0.112 4.466 0.000
## .phys71 0.150 0.012 12.803 0.000
## PSYDIS67 1.728 0.219 7.894 0.000
## PSYDIS71 1.403 0.151 9.291 0.000
parameterEstimates(results)
## lhs op rhs est se z pvalue ci.lower ci.upper
## 1 PSYDIS67 =~ psych67 1.000 0.000 NA NA 1.000 1.000
## 2 PSYDIS67 =~ phys67 0.208 0.026 7.893 0.000 0.156 0.260
## 3 PSYDIS71 =~ psych71 1.000 0.000 NA NA 1.000 1.000
## 4 PSYDIS71 =~ phys71 0.271 0.024 11.140 0.000 0.223 0.318
## 5 psych67 ~~ psych67 0.313 0.186 1.679 0.093 -0.052 0.679
## 6 phys67 ~~ phys67 0.233 0.016 14.889 0.000 0.202 0.263
## 7 psych71 ~~ psych71 0.498 0.112 4.466 0.000 0.280 0.717
## 8 phys71 ~~ phys71 0.150 0.012 12.803 0.000 0.127 0.173
## 9 PSYDIS67 ~~ PSYDIS67 1.728 0.219 7.894 0.000 1.299 2.157
## 10 PSYDIS71 ~~ PSYDIS71 1.403 0.151 9.291 0.000 1.107 1.699
## 11 PSYDIS67 ~~ PSYDIS71 1.029 0.091 11.372 0.000 0.852 1.207
fitMeasures(results)
## npar fmin chisq
## 9.000 0.015 18.304
## df pvalue baseline.chisq
## 1.000 0.000 579.458
## baseline.df baseline.pvalue cfi
## 6.000 0.000 0.970
## tli nnfi rfi
## 0.819 0.819 0.810
## nfi pnfi ifi
## 0.968 0.161 0.970
## rni logl unrestricted.logl
## 0.970 -2780.396 -2771.244
## aic bic ntotal
## 5578.791 5618.408 603.000
## bic2 rmsea rmsea.ci.lower
## 5589.836 0.169 0.107
## rmsea.ci.upper rmsea.pvalue rmr
## 0.241 0.001 0.012
## rmr_nomean srmr srmr_bentler
## 0.012 0.034 0.034
## srmr_bentler_nomean srmr_bollen srmr_bollen_nomean
## 0.034 0.034 0.034
## srmr_mplus srmr_mplus_nomean cn_05
## 0.034 0.034 127.555
## cn_01 gfi agfi
## 219.583 0.985 0.853
## pgfi mfi ecvi
## 0.099 0.986 0.060
inspect(results)
## $lambda
## PSYDIS6 PSYDIS7
## psych67 0 0
## phys67 1 0
## psych71 0 0
## phys71 0 2
##
## $theta
## psyc67 phys67 psyc71 phys71
## psych67 3
## phys67 0 4
## psych71 0 0 5
## phys71 0 0 0 6
##
## $psi
## PSYDIS6 PSYDIS7
## PSYDIS67 7
## PSYDIS71 9 8
library(semPlot)
semPaths(results, what = "est")
Nested model comparison
Just as we did above we define and run the models. To compare nested models we use the anova() function. anova() is a univeral model comparison function that knows what type of comparison to do based on the class of the objects that you put into it. In this case it uses chi square difference testing.
library(lavaan)
sd = c(1.43, 0.555, 1.38, 0.503)
lower = '1,
.454, 1,
.526, .247, 1,
.377, .309, .548, 1'
corel = getCov(lower, lower = T, names = c("psych67", "phys67", "psych71", "phys71"))
cov = cor2cov(corel, sd)
model = 'PSYDIS67=~psych67+phys67
PSYDIS71=~psych71+phys71'
results = cfa(model, sample.cov = cov, sample.nobs = 603)
model2 = 'PSYDIS=~psych67+phys67+psych71+phys71'
results2 = cfa(model2, sample.cov = cov, sample.nobs = 603)
anova(results, results2) #note the anova function is not ANOVA like you think, it compares models, it automatically knows what type of comparison based on the models you put in (class lavaan)
## Chi Square Difference Test
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## results 1 5578.8 5618.4 18.303
## results2 2 5627.1 5662.3 68.621 50.318 1 1.308e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Constraining variance
Lavaan uses “~~” to represent variance/covariance.
library(lavaan)
d = read.delim(file = "ex5.1HW.txt", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
model = 'f1=~V1+V2+V3+V4'
results1 = sem(model, d)
summary(results1, standardized = T) #include standardized estimates
## lavaan (0.5-23.1097) converged normally after 26 iterations
##
## Number of observations 432
##
## Estimator ML
## Minimum Function Test Statistic 1.210
## Degrees of freedom 2
## P-value (Chi-square) 0.546
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 0.941 0.671
## V2 1.141 0.114 10.012 0.000 1.073 0.767
## V3 0.969 0.095 10.151 0.000 0.912 0.668
## V4 -0.064 0.085 -0.755 0.450 -0.060 -0.042
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 1.082 0.106 10.181 0.000 1.082 0.550
## .V2 0.807 0.114 7.090 0.000 0.807 0.412
## .V3 1.033 0.101 10.264 0.000 1.033 0.554
## .V4 2.064 0.140 14.688 0.000 2.064 0.998
## f1 0.885 0.136 6.528 0.000 1.000 1.000
#drop V4
model = 'f1=~V1+V2+V3'
results1 = sem(model, d)
summary(results1, standardized = T)
## lavaan (0.5-23.1097) converged normally after 22 iterations
##
## Number of observations 432
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 0.942 0.672
## V2 1.136 0.113 10.017 0.000 1.070 0.765
## V3 0.969 0.095 10.152 0.000 0.913 0.669
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 1.079 0.106 10.150 0.000 1.079 0.549
## .V2 0.812 0.114 7.148 0.000 0.812 0.415
## .V3 1.031 0.101 10.241 0.000 1.031 0.553
## f1 0.888 0.136 6.538 0.000 1.000 1.000
#no dfs because it is the fully saturated model
#drop V3
model = 'f1=~V1+V2'
results = sem(model, d)
## Warning in lav_model_vcov(lavmodel = lavmodel, lavsamplestats = lavsamplestats, : lavaan WARNING: could not compute standard errors!
## lavaan NOTE: this may be a symptom that the model is not identified.
summary(results, standardized = T)
## lavaan (0.5-23.1097) converged normally after 12 iterations
##
## Number of observations 432
##
## Estimator ML
## Minimum Function Test Statistic NA
## Degrees of freedom -1
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 1.036 0.739
## V2 0.940 NA 0.974 0.696
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 0.894 NA 0.894 0.455
## .V2 1.010 NA 1.010 0.516
## f1 1.073 NA 1.000 1.000
#NAs because underidentified
#constrain variance of V1 and V2 to be equal
model = 'f1=~V1+V2
V1~~a*V1
V2~~a*V2'
results = sem(model, d)
summary(results, standardized = T)
## lavaan (0.5-23.1097) converged normally after 13 iterations
##
## Number of observations 432
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 1.006 0.718
## V2 0.996 0.080 12.451 0.000 1.002 0.716
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 (a) 0.954 0.065 14.697 0.000 0.954 0.485
## .V2 (a) 0.954 0.065 14.697 0.000 0.954 0.487
## f1 1.013 0.134 7.565 0.000 1.000 1.000
#variance of V1
var(d$V1)
## [1] 1.971384
#V1 only, contrain variance to be .1(var of variable) = .1(1.971) = .197
model = 'f1=~V1
V1~~.197*V1'
results = sem(model, d)
summary(results, standardized = T)
## lavaan (0.5-23.1097) converged normally after 1 iterations
##
## Number of observations 432
##
## Estimator ML
## Minimum Function Test Statistic 0.000
## Degrees of freedom 0
## Minimum Function Value 0.0000000000000
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Standard
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 1.330 0.949
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 0.197 0.197 0.100
## f1 1.770 0.134 13.225 0.000 1.000 1.000
Using different estimators
Change estimation methods by using argument estimator =
When using MLM if you want to compare models using satorra bentler by changing the method argument in either the anova() or lavTestLRT() function.
Unforturnately, R does not have a way to compare WLSMV models like difftest (yet)
library(lavaan)
d = read.delim(file = "ex5.1.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
model = 'f1=~V1+V2+V3
f2=~V4+V5+V6'
results = sem(model, d)
model2 = 'f1=~V1+V2+V3+V4+V5+V6'
results2 = sem(model2, d)
anova(results, results2)
## Chi Square Difference Test
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## results 8 9839.2 9894 3.8957
## results2 9 10065.4 10116 232.0462 228.15 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(results2, results) #note order does not matter, R automatically orders correctly
## Chi Square Difference Test
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## results 8 9839.2 9894 3.8957
## results2 9 10065.4 10116 232.0462 228.15 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
###############
model = 'f1=~V1+V2+V3
f2=~V4+V5+V6'
results = sem(model, d, estimator = "MLM")
summary(results, standardized = T)
## lavaan (0.5-23.1097) converged normally after 34 iterations
##
## Number of observations 500
##
## Estimator ML Robust
## Minimum Function Test Statistic 3.896 3.903
## Degrees of freedom 8 8
## P-value (Chi-square) 0.866 0.866
## Scaling correction factor 0.998
## for the Satorra-Bentler correction
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Robust.sem
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 0.952 0.678
## V2 1.127 0.098 11.510 0.000 1.073 0.769
## V3 1.020 0.088 11.579 0.000 0.971 0.695
## f2 =~
## V4 1.000 0.872 0.609
## V5 1.058 0.122 8.705 0.000 0.923 0.707
## V6 0.897 0.108 8.271 0.000 0.782 0.604
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 ~~
## f2 -0.030 0.054 -0.556 0.578 -0.036 -0.036
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 -0.022 0.063 -0.354 0.724 -0.022 -0.016
## .V2 0.026 0.062 0.409 0.682 0.026 0.018
## .V3 0.035 0.063 0.554 0.580 0.035 0.025
## .V4 -0.022 0.064 -0.350 0.726 -0.022 -0.016
## .V5 -0.016 0.058 -0.271 0.786 -0.016 -0.012
## .V6 0.048 0.058 0.823 0.411 0.048 0.037
## f1 0.000 0.000 0.000
## f2 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 1.064 0.096 11.036 0.000 1.064 0.540
## .V2 0.798 0.108 7.417 0.000 0.798 0.409
## .V3 1.010 0.092 10.976 0.000 1.010 0.517
## .V4 1.290 0.124 10.402 0.000 1.290 0.629
## .V5 0.854 0.108 7.919 0.000 0.854 0.501
## .V6 1.066 0.091 11.706 0.000 1.066 0.635
## f1 0.907 0.115 7.894 0.000 1.000 1.000
## f2 0.761 0.124 6.124 0.000 1.000 1.000
results2 = sem(model2, d, estimator = "MLM")
summary(results2, standardized = T)
## lavaan (0.5-23.1097) converged normally after 27 iterations
##
## Number of observations 500
##
## Estimator ML Robust
## Minimum Function Test Statistic 232.046 228.904
## Degrees of freedom 9 9
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.014
## for the Satorra-Bentler correction
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Robust.sem
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## f1 =~
## V1 1.000 0.952 0.678
## V2 1.127 0.098 11.525 0.000 1.073 0.768
## V3 1.021 0.088 11.589 0.000 0.972 0.695
## V4 -0.047 0.082 -0.569 0.569 -0.044 -0.031
## V5 -0.071 0.071 -0.990 0.322 -0.067 -0.052
## V6 0.005 0.069 0.069 0.945 0.005 0.004
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 -0.022 0.063 -0.354 0.724 -0.022 -0.016
## .V2 0.026 0.062 0.409 0.682 0.026 0.018
## .V3 0.035 0.063 0.554 0.580 0.035 0.025
## .V4 -0.022 0.064 -0.350 0.726 -0.022 -0.016
## .V5 -0.016 0.058 -0.271 0.786 -0.016 -0.012
## .V6 0.048 0.058 0.823 0.411 0.048 0.037
## f1 0.000 0.000 0.000
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .V1 1.065 0.096 11.046 0.000 1.065 0.540
## .V2 0.798 0.108 7.423 0.000 0.798 0.410
## .V3 1.009 0.092 10.982 0.000 1.009 0.517
## .V4 2.048 0.137 14.946 0.000 2.048 0.999
## .V5 1.702 0.107 15.962 0.000 1.702 0.997
## .V6 1.678 0.106 15.906 0.000 1.678 1.000
## f1 0.906 0.115 7.890 0.000 1.000 1.000
lavTestLRT(results, results2, method = "satorra.bentler.2010")
## Scaled Chi Square Difference Test (method = "satorra.bentler.2010")
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## results 8 9851.2 9931.3 3.8957
## results2 9 10077.4 10153.2 232.0462 200.24 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lavTestLRT(results, results2, method = "satorra.bentler.2001") #these give you the same results (I don't actually know what the difference between methods are)
## Scaled Chi Square Difference Test (method = "satorra.bentler.2001")
##
## Df AIC BIC Chisq Chisq diff Df diff Pr(>Chisq)
## results 8 9851.2 9931.3 3.8957
## results2 9 10077.4 10153.2 232.0462 200.24 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#################
library(lavaan)
d = read.delim(file = "ex5.2.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
d$V1 = ordered(d$V1)
d$V2 = ordered(d$V2)
d$V3 = ordered(d$V3)
d$V4 = ordered(d$V4)
d$V5 = ordered(d$V5)
d$V6 = ordered(d$V6)
model2f = 'f1=~V1+V2+V3
f2=~V4+V5+V6'
results2f = sem(model2f, d, estimator = "WLSMV")
summary(results2f)
## lavaan (0.5-23.1097) converged normally after 15 iterations
##
## Number of observations 500
##
## Estimator DWLS Robust
## Minimum Function Test Statistic 2.454 5.482
## Degrees of freedom 8 8
## P-value (Chi-square) 0.964 0.705
## Scaling correction factor 0.628
## Shift parameter 1.573
## for simple second-order correction (Mplus variant)
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Robust.sem
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## f1 =~
## V1 1.000
## V2 1.067 0.043 24.637 0.000
## V3 1.001 0.039 25.887 0.000
## f2 =~
## V4 1.000
## V5 1.109 0.054 20.461 0.000
## V6 1.030 0.047 21.966 0.000
##
## Covariances:
## Estimate Std.Err z-value P(>|z|)
## f1 ~~
## f2 -0.021 0.049 -0.431 0.666
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .V1 0.000
## .V2 0.000
## .V3 0.000
## .V4 0.000
## .V5 0.000
## .V6 0.000
## f1 0.000
## f2 0.000
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|)
## V1|t1 0.010 0.056 0.179 0.858
## V2|t1 0.015 0.056 0.268 0.789
## V3|t1 -0.010 0.056 -0.179 0.858
## V4|t1 0.060 0.056 1.072 0.284
## V5|t1 0.020 0.056 0.357 0.721
## V6|t1 0.025 0.056 0.447 0.655
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .V1 0.200
## .V2 0.088
## .V3 0.198
## .V4 0.264
## .V5 0.094
## .V6 0.219
## f1 0.800 0.046 17.422 0.000
## f2 0.736 0.052 14.085 0.000
##
## Scales y*:
## Estimate Std.Err z-value P(>|z|)
## V1 1.000
## V2 1.000
## V3 1.000
## V4 1.000
## V5 1.000
## V6 1.000
model1f = 'f1=~V1+V2+V3+V4+V5'
results1f = sem(model1f, d, estimator = "WLSMV")
summary(results1f)
## lavaan (0.5-23.1097) converged normally after 17 iterations
##
## Number of observations 500
##
## Estimator DWLS Robust
## Minimum Function Test Statistic 527.364 449.180
## Degrees of freedom 5 5
## P-value (Chi-square) 0.000 0.000
## Scaling correction factor 1.178
## Shift parameter 1.639
## for simple second-order correction (Mplus variant)
##
## Parameter Estimates:
##
## Information Expected
## Standard Errors Robust.sem
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|)
## f1 =~
## V1 1.000
## V2 1.110 0.045 24.927 0.000
## V3 1.005 0.040 25.272 0.000
## V4 -0.684 0.055 -12.368 0.000
## V5 -0.702 0.055 -12.771 0.000
##
## Intercepts:
## Estimate Std.Err z-value P(>|z|)
## .V1 0.000
## .V2 0.000
## .V3 0.000
## .V4 0.000
## .V5 0.000
## f1 0.000
##
## Thresholds:
## Estimate Std.Err z-value P(>|z|)
## V1|t1 0.010 0.056 0.179 0.858
## V2|t1 0.015 0.056 0.268 0.789
## V3|t1 -0.010 0.056 -0.179 0.858
## V4|t1 0.060 0.056 1.072 0.284
## V5|t1 0.020 0.056 0.357 0.721
##
## Variances:
## Estimate Std.Err z-value P(>|z|)
## .V1 0.279
## .V2 0.111
## .V3 0.271
## .V4 0.663
## .V5 0.644
## f1 0.721 0.043 16.627 0.000
##
## Scales y*:
## Estimate Std.Err z-value P(>|z|)
## V1 1.000
## V2 1.000
## V3 1.000
## V4 1.000
## V5 1.000
IRT
library(lavaan)
d = read.delim(file = "ex5.2.dat", header = FALSE, sep = "")#file needs to be in the same folder as R project otherwise instead of file name put file.choose() and it will pull up a browser to choose
d1 = d[,1:3] #keep only the first 3 variables
d2 = d[,4:6] #vars 4-6
#rasch model
library(ltm)
## Loading required package: MASS
## Loading required package: msm
## Warning: package 'msm' was built under R version 3.4.3
## Loading required package: polycor
descript(d1)
##
## Descriptive statistics for the 'd1' data-set
##
## Sample:
## 3 items and 500 sample units; 0 missing values
##
## Proportions for each level of response:
## 0 1 logit
## V1 0.504 0.496 -0.016
## V2 0.506 0.494 -0.024
## V3 0.496 0.504 0.016
##
##
## Frequencies of total scores:
## 0 1 2 3
## Freq 181 72 66 181
##
##
## Point Biserial correlation with Total Score:
## Included Excluded
## V1 0.8602 0.6837
## V2 0.8832 0.7301
## V3 0.8603 0.6839
##
##
## Cronbach's alpha:
## value
## All Items 0.8375
## Excluding V1 0.7895
## Excluding V2 0.7438
## Excluding V3 0.7893
##
##
## Pairwise Associations:
## Item i Item j p.value
## 1 1 3 <2e-16
## 2 1 2 <2e-16
## 3 2 3 <2e-16
fit1 = rasch(d1, constraint = cbind(length(d1) + 1, 1))
summary(fit1)
##
## Call:
## rasch(data = d1, constraint = cbind(length(d1) + 1, 1))
##
## Model Summary:
## log.Lik AIC BIC
## -914.2365 1834.473 1847.117
##
## Coefficients:
## value std.err z.vals
## Dffclt.V1 0.0200 0.1104 0.1814
## Dffclt.V2 0.0301 0.1104 0.2726
## Dffclt.V3 -0.0205 0.1104 -0.1857
## Dscrmn 1.0000 NA NA
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.0073
## quasi-Newton: BFGS
coef(fit1, prob = TRUE, order = FALSE)
## Dffclt Dscrmn P(x=1|z=0)
## V1 0.02002113 1 0.4949949
## V2 0.03008916 1 0.4924783
## V3 -0.02050388 1 0.5051258
plot(fit1, legend = TRUE, cx = "bottomright", lwd = 1.5,
cex.main = 1.5, cex.lab = 1.3, cex = .5)
plot(fit1, type = "IIC", annot = FALSE, lwd = 1.5, cex.main = 1.5, cex.lab = 1.3) #item information functions
plot(fit1, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
cex.lab = 1.3) # total Test Information Function
#1pl
fit2 <- rasch(d1)
## Warning in rasch(d1): Hessian matrix at convergence is not positive definite; unstable solution.
summary(fit2)
## Warning in sqrt(diag(new.covar)): NaNs produced
## Warning in sqrt(diag(new.covar)): NaNs produced
## Warning in sqrt(Var[n.ind + 1, n.ind + 1]): NaNs produced
##
## Call:
## rasch(data = d1)
##
## Model Summary:
## log.Lik AIC BIC
## -800.1996 1608.399 1625.258
##
## Coefficients:
## value std.err z.vals
## Dffclt.V1 0.0033 NaN NaN
## Dffclt.V2 0.0044 NaN NaN
## Dffclt.V3 -0.0011 0.0047 -0.2356
## Dscrmn 21.6585 NaN NaN
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 8.5e-05
## quasi-Newton: BFGS
coef(fit2, prob = TRUE, order = FALSE)
## Dffclt Dscrmn P(x=1|z=0)
## V1 0.003282365 21.65846 0.4822347
## V2 0.004379161 21.65846 0.4763063
## V3 -0.001098430 21.65846 0.5059473
plot(fit2, legend = TRUE, cx = "bottomright", lwd = 1.5,
cex.main = 1.5, cex.lab = 1.3, cex = .5)
plot(fit2, type = "IIC", annot = FALSE, lwd = 3, cex.main = 1.5,
cex.lab = 1.3) #item information functions
plot(fit2, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
cex.lab = 1.3) # total Test Information Function
# two parameter logistic model
fit3 <- ltm(d1 ~ z1)
summary(fit3)
##
## Call:
## ltm(formula = d1 ~ z1)
##
## Model Summary:
## log.Lik AIC BIC
## -790.8677 1593.735 1619.023
##
## Coefficients:
## value std.err z.vals
## Dffclt.V1 0.0175 0.0594 0.2947
## Dffclt.V2 0.0151 0.0426 0.3548
## Dffclt.V3 -0.0047 0.0593 -0.0786
## Dscrmn.V1 3.5439 0.5944 5.9621
## Dscrmn.V2 8.0957 11.4159 0.7092
## Dscrmn.V3 3.5618 0.6025 5.9116
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Convergence: 0
## max(|grad|): 0.016
## quasi-Newton: BFGS
coef(fit3, prob = TRUE, order = FALSE)
## Dffclt Dscrmn P(x=1|z=0)
## V1 0.017502488 3.543942 0.4844980
## V2 0.015114163 8.095659 0.4694483
## V3 -0.004662605 3.561795 0.5041517
plot(fit3, legend = TRUE, cx = "bottomright", lwd = 1.5,
cex.main = 1.5, cex.lab = 1.3, cex = .5)
plot(fit3, legend= TRUE, type = "IIC", annot = FALSE, lwd = 1.5, cex.main = 1.5,
cex.lab = 1.3) #item information functions
plot(fit3, type = "IIC", items = 0, lwd = 3, cex.main = 1.5,
cex.lab = 1.3) # total Test Information Function
#3pl (includes guessing)
fit4 = tpm(d1, type = c("latent.trait"))
## Warning in tpm(d1, type = c("latent.trait")): Hessian matrix at convergence is not positive definite; unstable solution.
summary(fit4)
##
## Call:
## tpm(data = d1, type = c("latent.trait"))
##
## Model Summary:
## log.Lik AIC BIC
## -790.7946 1599.589 1637.521
##
## Coefficients:
## value std.err z.vals
## Gussng.V1 0.0118 NaN NaN
## Gussng.V2 0.0040 0.0514 0.0770
## Gussng.V3 0.0132 NaN NaN
## Dffclt.V1 0.0275 NaN NaN
## Dffclt.V2 0.0133 0.0467 0.2842
## Dffclt.V3 0.0073 NaN NaN
## Dscrmn.V1 3.7309 NaN NaN
## Dscrmn.V2 8.8662 26.9651 0.3288
## Dscrmn.V3 3.7600 NaN NaN
##
## Integration:
## method: Gauss-Hermite
## quadrature points: 21
##
## Optimization:
## Optimizer: optim (BFGS)
## Convergence: 0
## max(|grad|): 0.0027
plot(fit4)
#compare nested models
anova(fit1, fit2)
##
## Likelihood Ratio Table
## AIC BIC log.Lik LRT df p.value
## fit1 1834.47 1847.12 -914.24
## fit2 1608.40 1625.26 -800.20 228.07 1 <0.001
anova(fit2, fit3)
##
## Likelihood Ratio Table
## AIC BIC log.Lik LRT df p.value
## fit2 1608.40 1625.26 -800.20
## fit3 1593.74 1619.02 -790.87 18.66 2 <0.001
Mediation
#example showing which is mediator#
#model.M <- lm(M ~ X, myData)
#model.Y <- lm(Y ~ X + M, myData)
#library(mediation)
#results <- mediate(model.M, model.Y, treat='X', mediator='M',
# boot=TRUE, sims=500)
#summary(results)
##
library(mediation)
## Loading required package: Matrix
## Warning: package 'Matrix' was built under R version 3.4.4
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 3.4.3
## Loading required package: sandwich
## mediation: Causal Mediation Analysis
## Version: 4.4.6
d = dataissues16
model.m = lm(wid~sbvoc,d)
model.y = lm(pcomp~wid+sbvoc,d)
results <- mediate(model.m, model.y, treat='sbvoc', mediator='wid',
boot=TRUE, sims=1000)
summary(results) #ACME is indirect effect
##
## Causal Mediation Analysis
##
## Nonparametric Bootstrap Confidence Intervals with the Percentile Method
##
## Estimate 95% CI Lower 95% CI Upper p-value
## ACME 0.440 0.161 0.71 <2e-16 ***
## ADE 0.630 0.308 1.00 <2e-16 ***
## Total Effect 1.070 0.794 1.39 <2e-16 ***
## Prop. Mediated 0.411 0.163 0.67 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Sample Size Used: 101
##
##
## Simulations: 1000