Sunday, 28 August 2011

Putting it all together


This final script is set up to run univariate ACE analysis on a twin dataset, compute confidence intervals for paths and chi square values for comparing models, print out results in a neatly formatted table, and save the output table as tab-separated text, so you can open it and edit in a word processed file.

If you can understand this script and modify it to use with your own data,  you will be ready to tackle the OpenMx User Guide.
You can also find plenty of scripts for different types of models on:
N.B. some of these were written before OpenMx introduced restrictions on illegal characters in names of objects. Check all output carefully, and if necessary rename models and their component objects.

Please note: this script is modified from one of the sample scripts provided on the OpenMx website, which was written by an expert. The modifications, however, were done by a novice, and this is not an elegantly written script!  If you follow it through, you will see how the different terms for the summary table can be identified from an mxModel output, but there are much better ways of doing this under development. In particular, there is a file called GenEpiHelperFunctions.R which you can find on:
http://www.vipbg.vcu.edu/~vipbg/OpenMx/
If you copy this as an .R file into your R directory, you can then access its functions by including the following line in your scripts:
source("GenEpiHelperFunctions.R")

These functions will ultimately be integrated into OpenMx. There are scripts demonstrating their use on the same website.

# -----------------------------------------------------------------------
# Program: Generic Univariate ACE with saturated model comparison
#  Author: DVM Bishop
#  Date: 15th March 2010
#  based on  UnivariateTwinAnalysis_MatrixRaw.R by Hermine Maes
# -----------------------------------------------------------------------

# NB Your console screen width should be set nearly at full screen
# to get properly formatted output
require(OpenMx)

#Prepare Data
# -----------------------------------------------------------------------
mydatafile='myData\\chinese_both_wavesMar10dat.dat'
                                         #finds the .dat file in directory 'myData'
                                         # which is subdirectory of current R directory
                                         # Note double slash to separate directory from filename
alldat=read.table(mydatafile, header = TRUE)
# assumes column names in top line (otherwise header is FALSE)
mycols=colnames(alldat)
#To find column number for variable of interest, look at mycols.

 validmz=alldat$zygo_SNP==1 #allocates each row as true/false for later selection
 validdz=alldat$zygo_SNP==2  #MZ and DZ take values of 1 or 2 on zygo_SNP

# Specify here the column numbers you want to use
col1=376;col2=377
myMZdata=alldat[validmz,col1:col2]  #column numbers are for ZTotal2_CVocab_age
myDZdata=alldat[validdz,col1:col2]  #this format makes it easy to alter variables and rerun

datasetname=c(mydatafile,mycols[col1]," and ",mycols[col2])
#to identify the variables used in analysis
colnames(myMZdata)=c("twin1","twin2")  #need to use names without dots in them
colnames(myDZdata)=c("twin1","twin2")
nucolnames=colnames(myMZdata)
datasetname  #will print the names of variables on the output
summary(myMZdata)
summary(myDZdata)

cov(myMZdata,use="complete") #don't include cases with missing data
cov(myDZdata,use="complete")

# Count number of data observations (discarding missing); will be used for DF calculation
MZlist=as.vector(as.matrix(myMZdata)) #put all MZ data in a single row
MZfinite=MZlist[is.finite(MZlist)]    #retain those with finite values
DZlist=as.vector(as.matrix(myDZdata)) # do the same with DZ
DZfinite=DZlist[is.finite(DZlist)]
Nobs=length(MZfinite)+length(DZfinite)

# -----------------------------------------------------------------------
#Fit Saturated Model with RawData and Matrices Input
# -----------------------------------------------------------------------

# Model specification starts here
mytwinSatModel <- mxModel("twinSat",
            mxModel("MZ",
                        mxMatrix(type="Full", nrow=1,ncol= 2, free=TRUE,values=c(0,0),name="expMeanMZ"),
                        mxMatrix("Lower",nrow= 2,ncol=2,free=TRUE,values=.5,name="CholMZ"),
                        mxAlgebra(CholMZ %*% t(CholMZ), name="expCovMZ"),
                        mxData(myMZdata, type="raw"),
                        mxFIMLObjective("expCovMZ", "expMeanMZ", nucolnames)
                        ),

            mxModel("DZ",
                        mxMatrix(type="Full", nrow=1,ncol= 2, free=TRUE, values=c(0,0), name="expMeanDZ"),
                        mxMatrix(type="Lower", nrow=2, ncol=2, free=TRUE, values=.5, name="CholDZ"),
                        mxAlgebra(CholDZ %*% t(CholDZ), name="expCovDZ"),
                        mxData(myDZdata, type="raw"),
                        mxFIMLObjective("expCovDZ", "expMeanDZ", nucolnames)
                        ),

            mxAlgebra(MZ.objective + DZ.objective, name="twin"),
# adds together likelihoods for MZ and DZ groups
            mxAlgebraObjective("twin"))  
# evaluate expression from mxAlgebra, i.e. both submodels together
#---------------------------------------------------------------------------------------------------------------------------
mytwinSatFit <- mxRun(mytwinSatModel) #The mxRun command evaluates the model.

LL_Sat <- mxEval(objective, mytwinSatFit)
summary(mxRun(mytwinSatModel))
#--------------------------------------------------------------------------------------------------------------------------
# compute DF for this model # N observations (all rows and variables, minus N estimated parameters)
DF_Sat=Nobs-nrow(mytwinSatFit@output$standardErrors)

# -----------------------------------------------------------------------
# Fit ACE Model with RawData and Matrices Input
# -----------------------------------------------------------------------
twinACEModel <- mxModel("twinACE",
                        # Matrices X, Y, and Z to store a, c, and e path coefficients
                        mxMatrix(type="Full", nrow=1,ncol=1,free=TRUE, values=.6,label="a",name="X",lbound=0),
                        mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE,values=.6,label="c",name="Y",lbound=0),
                        mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE,values=.6,label="e",name="Z",lbound=0),
            # Matrices A, C, and E compute variance components
                        mxAlgebra(expression=X %*% t(X), name="A"),
                        mxAlgebra(expression=Y %*% t(Y), name="C"),
                        mxAlgebra(expression=Z %*% t(Z), name="E"),
                        mxMatrix(type="Full", nrow=1, ncol=2, free=TRUE, values= 20,label="mean", name="expMean"),
            # Algebra for expected variance/covariance matrix in MZ
                        mxAlgebra(expression= rbind  (cbind(A+C+E , A+C),
                                                                                      cbind(A+C   , A+C+E)),
                        name="expCovMZ"),
            # Algebra for expected variance/covariance matrix in DZ
            # note use of 0.5, converted to 1*1 matrix with Kronecker product
                        mxAlgebra(expression= rbind  (cbind(A+C+E     , 0.5%x%A+C),
                                                                                     cbind(0.5%x%A+C , A+C+E)),
                        name="expCovDZ"),
           
            mxModel("MZ",
                mxData(observed=myMZdata, type="raw"),
                mxFIMLObjective(
                                    covariance="twinACE.expCovMZ",
                                    means="twinACE.expMean",
                                    dimnames=nucolnames)),
            mxModel("DZ",
                mxData(observed=myDZdata, type="raw"),
                mxFIMLObjective(
                                    covariance="twinACE.expCovDZ",
                                    means="twinACE.expMean",
                                    dimnames=nucolnames)),
            mxAlgebra(expression=MZ.objective + DZ.objective, name="twin"),
            mxAlgebraObjective("twin")
)

# -----------------------------------------------------------------------
#Run ACE model
# -----------------------------------------------------------------------
twinACEFit <- mxRun(twinACEModel)

DF_ACE=Nobs-nrow(twinACEFit@output$standardErrors)
LL_ACE <- mxEval(objective, twinACEFit)
mychi_ACE= LL_ACE - LL_Sat  #subtract LL for Saturated model from LL for ACE
mychi_DF_ACE=DF_ACE-DF_Sat  #subtract DF for Saturated model from DF for ACE
mychi_p_ACE=1-pchisq(mychi_ACE,mychi_DF_ACE) # compute chi square probability

expMZcov_ACE <- mxEval(expCovMZ, twinACEFit)                                 
# expected covariance matrix for MZ's
expDZcov_ACE <- mxEval(expCovDZ, twinACEFit)                                  
# expected covariance matrix for DZ's
expMeans_ACE <- mxEval(expMean, twinACEFit)     # expected mean
A_ACE <- mxEval(a*a, twinACEFit)                          # additive genetic variance, a^2
C_ACE <- mxEval(c*c, twinACEFit)                          # shared environmental variance, c^2
E_ACE <- mxEval(e*e, twinACEFit)                          # unique environmental variance, e^2
V <- (A_ACE+C_ACE+E_ACE)                                # total variance
a2_ACE <- A_ACE/V                   # standardized additive genetic variance
c2_ACE <- C_ACE/V                   # standardized shared environmental variance
e2_ACE <- E_ACE/V                   # standardized unique environmental variance

ACE_mySE=round(twinACEFit@output$standardErrors,3)
ACE_myest=round(twinACEFit@output$estimate,3)
ACE_mylower=round(ACE_myest-1.96*ACE_mySE,3)
ACE_myupper=round(ACE_myest+1.96*ACE_mySE,3)
# -----------------------------------------------------------------------
#Set up and run AE model
# -----------------------------------------------------------------------
twinAEModel <- mxModel(twinACEModel, name = "twinACE",  #retain ACE name or won't match objective function
            # drop c , i.e. fix at zero
            mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE, values=0, label="c", name="Y")         
)

twinAEFit <- mxRun(twinAEModel)
LL_AE <- mxEval(objective, twinAEFit)
DF_AE=Nobs-nrow(twinAEFit@output$standardErrors)

mychi_AE= LL_AE - LL_Sat  #subtract LL for Saturated model from LL for AE
mychi_DF_AE=DF_AE-DF_Sat  #subtract DF for Saturated model from DF for AE
mychi_p_AE=1-pchisq(mychi_AE,mychi_DF_AE)# compute chi square probability
#expected values for covs and means can be found in mxEval(expCovMZ, twinAEFit)

A_AE <- mxEval(a*a, twinAEFit)
C_AE <- mxEval(c*c, twinAEFit)
E_AE <- mxEval(e*e, twinAEFit)
V <- (A_AE+C_AE+E_AE)
a2_AE <- A_AE/V
c2_AE <- C_AE/V
e2_AE <- E_AE/V

mychiAEdiff=mychi_AE-mychi_ACE
myDFAEdiff=mychi_DF_AE-mychi_DF_ACE
mychiAEdiff_p=1-pchisq(mychiAEdiff,myDFAEdiff)
AE_mySE=round(twinAEFit@output$standardErrors,3)
AE_myest=round(twinAEFit@output$estimate,3)
AE_mylower=round(AE_myest-1.96*AE_mySE,3)
AE_myupper=round(AE_myest+1.96*AE_mySE,3)

# -----------------------------------------------------------------------
#Set up and run CE model
# -----------------------------------------------------------------------
twinCEModel <- mxModel(twinACEModel, name = "twinACE",
            # drop a , i.e. fix at zero
            mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE, values=0, label="a", name="X")          
)

twinCEFit <- mxRun(twinCEModel)

DF_CE=Nobs-nrow(twinCEFit@output$standardErrors)
LL_CE <- mxEval(objective, twinCEFit)
mychi_CE= LL_CE - LL_Sat  #subtract LL for Saturated model from LL for CE
mychi_DF_CE=DF_CE-DF_Sat  #subtract DF for Saturated model from DF for CE
mychi_p_CE=1-pchisq(mychi_CE,mychi_DF_CE)# compute chi square probability
#expected values for covs and means can be found in mxEval(expCovMZ, twinCEFit)

A_CE <- mxEval(a*a, twinCEFit)
C_CE <- mxEval(c*c, twinCEFit)
E_CE <- mxEval(e*e, twinCEFit)
V <- (A_CE+C_CE+E_CE)
a2_CE <- A_CE/V
c2_CE <- C_CE/V
e2_CE <- E_CE/V

mychiCEdiff=mychi_CE-mychi_ACE
myDFCEdiff=mychi_DF_CE-mychi_DF_ACE
mychiCEdiff_p=1-pchisq(mychiCEdiff,myDFCEdiff)
CE_mySE=round(twinCEFit@output$standardErrors,3)
CE_myest=round(twinCEFit@output$estimate,3)
CE_mylower=round(CE_myest-1.96*CE_mySE,3)
CE_myupper=round(CE_myest+1.96*CE_mySE,3)

#----------------------------------------------------------------------------------------------
#  Output to compare all models
#----------------------------------------------------------------------------------------------

myoutput <- rbind(cbind("__________________________","_______________","_________________","______________"),
                                    cbind("ACE model","A","C","E"),
                                    cbind("Unsquared path estimates",ACE_myest[1],ACE_myest[2],ACE_myest[3]),
                                    cbind("Standard errors",ACE_mySE[1],ACE_mySE[2],ACE_mySE[3]),
                                    cbind("Lower 95% CI",ACE_mylower[1],ACE_mylower[2],ACE_mylower[3]),
                                    cbind("Upper 95% CI",ACE_myupper[1],ACE_myupper[2],ACE_myupper[3]),
                  cbind(".",".",".","."),
                                    cbind("Unstandardized variance comps",round(A_ACE,3),round(C_ACE,3),round(E_ACE,3)),
                  cbind("Standardized variance comps",round(a2_ACE,3),round(c2_ACE,3),round(e2_ACE,3)),
                  cbind(".","chisq","DF","p"),
                                    cbind("saturated vs. ACE",round(mychi_ACE,3),mychi_DF_ACE,round(mychi_p_ACE,3)),
                                    cbind("__________________________","_______________","_________________","______________"),
                                    cbind("AE model","A","C","E"),
                                    cbind("Unsquared path estimates",AE_myest[1],".",AE_myest[2]),#NB no estimate for C!
                                    cbind("Standard errors",AE_mySE[1],".",AE_mySE[2]),
                                    cbind("Lower 95% CI",AE_mylower[1],".",AE_mylower[2]),
                                    cbind("Upper 95% CI",AE_myupper[1],".",AE_myupper[2]),
                  cbind(".",".",".","."),

                                    cbind("Unstandardized variance comps",round(A_AE,3),round(C_AE,3),round(E_AE,3)),
                  cbind("Standardized variance comps",round(a2_AE,3),round(c2_AE,3),round(e2_AE,3)),
                  cbind(".","chisq","DF","p"),
                                    cbind("saturated vs. AE",round(mychi_AE,3),mychi_DF_AE,round(mychi_p_AE,3)),
                                    cbind("ACE vs AE",round(mychiAEdiff,3),1,round(mychiAEdiff_p,3)),
                                    cbind(".",".",".","."),
                                    cbind("__________________________","_______________","_________________","______________"),
                                    cbind("CE model","A","C","E"),
                                    cbind("Unsquared path estimates",".",CE_myest[1],CE_myest[2]),
                                    cbind("Standard errors",".",CE_mySE[1],CE_mySE[2]),
                                    cbind("Lower 95% CI",".",CE_mylower[1],CE_mylower[2]),
                                    cbind("Upper 95% CI",".",CE_myupper[1],CE_myupper[2]),
                  cbind(".",".",".","."),

                                    cbind("Unstandardized variance comps",round(A_CE,3),round(C_CE,3),round(E_CE,3)),
                  cbind("Standardized variance comps",round(a2_CE,3),round(c2_CE,3),round(e2_CE,3)),
                        cbind(".","chisq","DF","p"),
                                    cbind("saturated vs. CE",round(mychi_CE,3),mychi_DF_CE,round(mychi_p_CE,3)),
                                    cbind("ACE vs CE",round(mychiCEdiff,3),1,round(mychiCEdiff_p,3)),
                                    cbind("__________________________","_______________","_________________","______________"),
                                    cbind(date(),".",".","."))

#round is used here simply to keep output to 3 decimal places

myoutput2 <- data.frame(myoutput)
names(myoutput2)=datasetname
write.table(myoutput2, "myfileout.txt",sep = "\t",quote=F)
#saves hard copy of output as tab-separated text
                                  # You can read 'myfileout.txt' into Word for easy formatting
myoutput2  #print the table on screen
mymessage="If table not formatted properly, make console screen full size and type myoutput2"
%---------------------------------------------------------------------------------------------------------------


No comments:

Post a Comment