Twin methods in OpenMx: Putting it all together

This final script is set up to run univariate ACE analysis on a twin dataset, compute confidence intervals for paths and chi square values for comparing models, print out results in a neatly formatted table, and save the output table as tab-separated text, so you can open it and edit in a word processed file.

If you can understand this script and modify it to use with your own data, you will be ready to tackle the OpenMx User Guide.

You can also find plenty of scripts for different types of models on:

http://www.vipbg.vcu.edu/~vipbg/OpenMx/

N.B. some of these were written before OpenMx introduced restrictions on illegal characters in names of objects. Check all output carefully, and if necessary rename models and their component objects.

Please note: this script is modified from one of the sample scripts provided on the OpenMx website, which was written by an expert. The modifications, however, were done by a novice, and this is not an elegantly written script! If you follow it through, you will see how the different terms for the summary table can be identified from an mxModel output, but there are much better ways of doing this under development. In particular, there is a file called GenEpiHelperFunctions.R which you can find on:

http://www.vipbg.vcu.edu/~vipbg/OpenMx/

If you copy this as an .R file into your R directory, you can then access its functions by including the following line in your scripts:

source("GenEpiHelperFunctions.R")

These functions will ultimately be integrated into OpenMx. There are scripts demonstrating their use on the same website.

# -----------------------------------------------------------------------

# Program: Generic Univariate ACE with saturated model comparison

# Author: DVM Bishop

# Date: 15th March 2010

# based on UnivariateTwinAnalysis_MatrixRaw.R by Hermine Maes

# -----------------------------------------------------------------------

# NB Your console screen width should be set nearly at full screen

# to get properly formatted output

require(OpenMx)

#Prepare Data

# -----------------------------------------------------------------------

mydatafile='myData\\chinese_both_wavesMar10dat.dat'

#finds the .dat file in directory 'myData'

# which is subdirectory of current R directory

# Note double slash to separate directory from filename

alldat=read.table(mydatafile, header = TRUE)

# assumes column names in top line (otherwise header is FALSE)

mycols=colnames(alldat)

#To find column number for variable of interest, look at mycols.

validmz=alldat$zygo_SNP==1 #allocates each row as true/false for later selection

validdz=alldat$zygo_SNP==2 #MZ and DZ take values of 1 or 2 on zygo_SNP

# Specify here the column numbers you want to use

col1=376;col2=377

myMZdata=alldat[validmz,col1:col2] #column numbers are for ZTotal2_CVocab_age

myDZdata=alldat[validdz,col1:col2] #this format makes it easy to alter variables and rerun

datasetname=c(mydatafile,mycols[col1]," and ",mycols[col2])

#to identify the variables used in analysis

colnames(myMZdata)=c("twin1","twin2") #need to use names without dots in them

colnames(myDZdata)=c("twin1","twin2")

nucolnames=colnames(myMZdata)

datasetname #will print the names of variables on the output

summary(myMZdata)

summary(myDZdata)

cov(myMZdata,use="complete") #don't include cases with missing data

cov(myDZdata,use="complete")

# Count number of data observations (discarding missing); will be used for DF calculation

MZlist=as.vector(as.matrix(myMZdata)) #put all MZ data in a single row

MZfinite=MZlist[is.finite(MZlist)] #retain those with finite values

DZlist=as.vector(as.matrix(myDZdata)) # do the same with DZ

DZfinite=DZlist[is.finite(DZlist)]

Nobs=length(MZfinite)+length(DZfinite)

# -----------------------------------------------------------------------

#Fit Saturated Model with RawData and Matrices Input

# -----------------------------------------------------------------------

# Model specification starts here

mytwinSatModel <- mxModel("twinSat",

mxModel("MZ",

mxMatrix(type="Full", nrow=1,ncol= 2, free=TRUE,values=c(0,0),name="expMeanMZ"),

mxMatrix("Lower",nrow= 2,ncol=2,free=TRUE,values=.5,name="CholMZ"),

mxAlgebra(CholMZ %*% t(CholMZ), name="expCovMZ"),

mxData(myMZdata, type="raw"),

mxFIMLObjective("expCovMZ", "expMeanMZ", nucolnames)

mxModel("DZ",

mxMatrix(type="Full", nrow=1,ncol= 2, free=TRUE, values=c(0,0), name="expMeanDZ"),

mxMatrix(type="Lower", nrow=2, ncol=2, free=TRUE, values=.5, name="CholDZ"),

mxAlgebra(CholDZ %*% t(CholDZ), name="expCovDZ"),

mxData(myDZdata, type="raw"),

mxFIMLObjective("expCovDZ", "expMeanDZ", nucolnames)

mxAlgebra(MZ.objective + DZ.objective, name="twin"),

# adds together likelihoods for MZ and DZ groups

mxAlgebraObjective("twin"))

# evaluate expression from mxAlgebra, i.e. both submodels together

#---------------------------------------------------------------------------------------------------------------------------

mytwinSatFit <- mxRun(mytwinSatModel) #The mxRun command evaluates the model.

LL_Sat <- mxEval(objective, mytwinSatFit)

summary(mxRun(mytwinSatModel))

#--------------------------------------------------------------------------------------------------------------------------

# compute DF for this model # N observations (all rows and variables, minus N estimated parameters)

DF_Sat=Nobs-nrow(mytwinSatFit@output$standardErrors)

# -----------------------------------------------------------------------

# Fit ACE Model with RawData and Matrices Input

# -----------------------------------------------------------------------

twinACEModel <- mxModel("twinACE",

# Matrices X, Y, and Z to store a, c, and e path coefficients

mxMatrix(type="Full", nrow=1,ncol=1,free=TRUE, values=.6,label="a",name="X",lbound=0),

mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE,values=.6,label="c",name="Y",lbound=0),

mxMatrix(type="Full", nrow=1, ncol=1, free=TRUE,values=.6,label="e",name="Z",lbound=0),

# Matrices A, C, and E compute variance components

mxAlgebra(expression=X %*% t(X), name="A"),

mxAlgebra(expression=Y %*% t(Y), name="C"),

mxAlgebra(expression=Z %*% t(Z), name="E"),

mxMatrix(type="Full", nrow=1, ncol=2, free=TRUE, values= 20,label="mean", name="expMean"),

# Algebra for expected variance/covariance matrix in MZ

mxAlgebra(expression= rbind (cbind(A+C+E , A+C),

cbind(A+C , A+C+E)),

name="expCovMZ"),

# Algebra for expected variance/covariance matrix in DZ

# note use of 0.5, converted to 1*1 matrix with Kronecker product

mxAlgebra(expression= rbind (cbind(A+C+E , 0.5%x%A+C),

cbind(0.5%x%A+C , A+C+E)),

name="expCovDZ"),

mxModel("MZ",

mxData(observed=myMZdata, type="raw"),

mxFIMLObjective(

covariance="twinACE.expCovMZ",

means="twinACE.expMean",

dimnames=nucolnames)),

mxModel("DZ",

mxData(observed=myDZdata, type="raw"),

mxFIMLObjective(

covariance="twinACE.expCovDZ",

means="twinACE.expMean",

dimnames=nucolnames)),

mxAlgebra(expression=MZ.objective + DZ.objective, name="twin"),

mxAlgebraObjective("twin")

)

# -----------------------------------------------------------------------

#Run ACE model

# -----------------------------------------------------------------------

twinACEFit <- mxRun(twinACEModel)

DF_ACE=Nobs-nrow(twinACEFit@output$standardErrors)

LL_ACE <- mxEval(objective, twinACEFit)

mychi_ACE= LL_ACE - LL_Sat #subtract LL for Saturated model from LL for ACE

mychi_DF_ACE=DF_ACE-DF_Sat #subtract DF for Saturated model from DF for ACE

mychi_p_ACE=1-pchisq(mychi_ACE,mychi_DF_ACE) # compute chi square probability

expMZcov_ACE <- mxEval(expCovMZ, twinACEFit)

# expected covariance matrix for MZ's

expDZcov_ACE <- mxEval(expCovDZ, twinACEFit)

# expected covariance matrix for DZ's

expMeans_ACE <- mxEval(expMean, twinACEFit) # expected mean

A_ACE <- mxEval(a*a, twinACEFit) # additive genetic variance, a^2

C_ACE <- mxEval(c*c, twinACEFit) # shared environmental variance, c^2

E_ACE <- mxEval(e*e, twinACEFit) # unique environmental variance, e^2

V <- (A_ACE+C_ACE+E_ACE) # total variance

a2_ACE <- A_ACE/V # standardized additive genetic variance

c2_ACE <- C_ACE/V # standardized shared environmental variance

e2_ACE <- E_ACE/V # standardized unique environmental variance

ACE_mySE=round(twinACEFit@output$standardErrors,3)

ACE_myest=round(twinACEFit@output$estimate,3)

ACE_mylower=round(ACE_myest-1.96*ACE_mySE,3)

ACE_myupper=round(ACE_myest+1.96*ACE_mySE,3)

# -----------------------------------------------------------------------

#Set up and run AE model

# -----------------------------------------------------------------------

twinAEModel <- mxModel(twinACEModel, name = "twinACE", #retain ACE name or won't match objective function

# drop c , i.e. fix at zero

mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE, values=0, label="c", name="Y")

)

twinAEFit <- mxRun(twinAEModel)

LL_AE <- mxEval(objective, twinAEFit)

DF_AE=Nobs-nrow(twinAEFit@output$standardErrors)

mychi_AE= LL_AE - LL_Sat #subtract LL for Saturated model from LL for AE

mychi_DF_AE=DF_AE-DF_Sat #subtract DF for Saturated model from DF for AE

mychi_p_AE=1-pchisq(mychi_AE,mychi_DF_AE)# compute chi square probability

#expected values for covs and means can be found in mxEval(expCovMZ, twinAEFit)

A_AE <- mxEval(a*a, twinAEFit)

C_AE <- mxEval(c*c, twinAEFit)

E_AE <- mxEval(e*e, twinAEFit)

V <- (A_AE+C_AE+E_AE)

a2_AE <- A_AE/V

c2_AE <- C_AE/V

e2_AE <- E_AE/V

mychiAEdiff=mychi_AE-mychi_ACE

myDFAEdiff=mychi_DF_AE-mychi_DF_ACE

mychiAEdiff_p=1-pchisq(mychiAEdiff,myDFAEdiff)

AE_mySE=round(twinAEFit@output$standardErrors,3)

AE_myest=round(twinAEFit@output$estimate,3)

AE_mylower=round(AE_myest-1.96*AE_mySE,3)

AE_myupper=round(AE_myest+1.96*AE_mySE,3)

# -----------------------------------------------------------------------

#Set up and run CE model

# -----------------------------------------------------------------------

twinCEModel <- mxModel(twinACEModel, name = "twinACE",

# drop a , i.e. fix at zero

mxMatrix(type="Full", nrow=1, ncol=1, free=FALSE, values=0, label="a", name="X")

)

twinCEFit <- mxRun(twinCEModel)

DF_CE=Nobs-nrow(twinCEFit@output$standardErrors)

LL_CE <- mxEval(objective, twinCEFit)

mychi_CE= LL_CE - LL_Sat #subtract LL for Saturated model from LL for CE

mychi_DF_CE=DF_CE-DF_Sat #subtract DF for Saturated model from DF for CE

mychi_p_CE=1-pchisq(mychi_CE,mychi_DF_CE)# compute chi square probability

#expected values for covs and means can be found in mxEval(expCovMZ, twinCEFit)

A_CE <- mxEval(a*a, twinCEFit)

C_CE <- mxEval(c*c, twinCEFit)

E_CE <- mxEval(e*e, twinCEFit)

V <- (A_CE+C_CE+E_CE)

a2_CE <- A_CE/V

c2_CE <- C_CE/V

e2_CE <- E_CE/V

mychiCEdiff=mychi_CE-mychi_ACE

myDFCEdiff=mychi_DF_CE-mychi_DF_ACE

mychiCEdiff_p=1-pchisq(mychiCEdiff,myDFCEdiff)

CE_mySE=round(twinCEFit@output$standardErrors,3)

CE_myest=round(twinCEFit@output$estimate,3)

CE_mylower=round(CE_myest-1.96*CE_mySE,3)

CE_myupper=round(CE_myest+1.96*CE_mySE,3)

#----------------------------------------------------------------------------------------------

# Output to compare all models

#----------------------------------------------------------------------------------------------

myoutput <- rbind(cbind("__________________________","_______________","_________________","______________"),

cbind("ACE model","A","C","E"),

cbind("Unsquared path estimates",ACE_myest[1],ACE_myest[2],ACE_myest[3]),

cbind("Standard errors",ACE_mySE[1],ACE_mySE[2],ACE_mySE[3]),

cbind("Lower 95% CI",ACE_mylower[1],ACE_mylower[2],ACE_mylower[3]),

cbind("Upper 95% CI",ACE_myupper[1],ACE_myupper[2],ACE_myupper[3]),

cbind(".",".",".","."),

cbind("Unstandardized variance comps",round(A_ACE,3),round(C_ACE,3),round(E_ACE,3)),

cbind("Standardized variance comps",round(a2_ACE,3),round(c2_ACE,3),round(e2_ACE,3)),

cbind(".","chisq","DF","p"),

cbind("saturated vs. ACE",round(mychi_ACE,3),mychi_DF_ACE,round(mychi_p_ACE,3)),

cbind("__________________________","_______________","_________________","______________"),

cbind("AE model","A","C","E"),

cbind("Unsquared path estimates",AE_myest[1],".",AE_myest[2]),#NB no estimate for C!

cbind("Standard errors",AE_mySE[1],".",AE_mySE[2]),

cbind("Lower 95% CI",AE_mylower[1],".",AE_mylower[2]),

cbind("Upper 95% CI",AE_myupper[1],".",AE_myupper[2]),

cbind(".",".",".","."),

cbind("Unstandardized variance comps",round(A_AE,3),round(C_AE,3),round(E_AE,3)),

cbind("Standardized variance comps",round(a2_AE,3),round(c2_AE,3),round(e2_AE,3)),

cbind(".","chisq","DF","p"),

cbind("saturated vs. AE",round(mychi_AE,3),mychi_DF_AE,round(mychi_p_AE,3)),

cbind("ACE vs AE",round(mychiAEdiff,3),1,round(mychiAEdiff_p,3)),

cbind(".",".",".","."),

cbind("__________________________","_______________","_________________","______________"),

cbind("CE model","A","C","E"),

cbind("Unsquared path estimates",".",CE_myest[1],CE_myest[2]),

cbind("Standard errors",".",CE_mySE[1],CE_mySE[2]),

cbind("Lower 95% CI",".",CE_mylower[1],CE_mylower[2]),

cbind("Upper 95% CI",".",CE_myupper[1],CE_myupper[2]),

cbind(".",".",".","."),

cbind("Unstandardized variance comps",round(A_CE,3),round(C_CE,3),round(E_CE,3)),

cbind("Standardized variance comps",round(a2_CE,3),round(c2_CE,3),round(e2_CE,3)),

cbind(".","chisq","DF","p"),

cbind("saturated vs. CE",round(mychi_CE,3),mychi_DF_CE,round(mychi_p_CE,3)),

cbind("ACE vs CE",round(mychiCEdiff,3),1,round(mychiCEdiff_p,3)),

cbind("__________________________","_______________","_________________","______________"),

cbind(date(),".",".","."))

#round is used here simply to keep output to 3 decimal places

myoutput2 <- data.frame(myoutput)

names(myoutput2)=datasetname

write.table(myoutput2, "myfileout.txt",sep = "\t",quote=F)

#saves hard copy of output as tab-separated text

# You can read 'myfileout.txt' into Word for easy formatting

myoutput2 #print the table on screen

mymessage="If table not formatted properly, make console screen full size and type myoutput2"

%---------------------------------------------------------------------------------------------------------------

Twin methods in OpenMx

Sunday 28 August 2011

Putting it all together

No comments:

Post a Comment

Total Pageviews