# The input for the full admixture analysis should consist of three # text files: one with trait values Y, second with values of the # genome-wide ancestry Q, third containing the genotype # matrix X and the matrix of locus specific ancestries Z (two matrices # in one file). If you have X and Z in separete files and you have problems # with combine them (files are big and you do not have enough RAM), you can # use 'combineBigMatrices' function). # # The input files should be of the following forms: ## Y and Q: # 0.192 # 0.738 # 0.473 # ... # (one column of numbers) ## matrices X and Z: # XSNP1 XSNP2 XSNP3 ... ZANC1 ZANC2 ZANC3 ... # 0 1 0 2 1 0 ... # 0 1 1 1 1 0 ... # 1 2 0 1 1 2 ... # ... # Genotypes (ancestries) of consecutive SNPs appear in different columns; # names of genotype have to start with letter "X" and names of ancestry # state variables of a given SNP have to start with letter "Z"; # names should not coincide; any separator can be used between columns. # If you have observations in columns, you can use 'transposeBigMatrix' # function (if you do not have enough RAM). # Missing values should be coded as NA. # The main function for admixture analysis is 'selectModel' from # package 'bigstep'. To see how to use it, type '?selectModel'. ### example of use library(bigstep) library(bigmemory) library(matrixStats) source("admixtures_functions.R") # only genotype-based GWAS X <- read.big.matrix("X.txt", sep=" ", type="char", header=TRUE, shared=FALSE) # (if you have enough RAM, you do not have to use 'read.big.matrix' function, # but eg. 'read.table' and all analysis will be much quicker; # to perform only genotype-based GWAS, names of columns can start with # any letter) y <- read.table("Trait.txt") M <- ncol(X) selectModel(X, y, crit=mbic2, p=M, minpb=0.15, multif=FALSE) # joint genotype and ancestry-based GWAS XZ <- read.big.matrix("XZ.txt", sep=" ", type="char", header=TRUE, shared=FALSE) y <- read.table("Trait.txt") Q <- read.table("Q.txt") selectModel(XZ, y, crit=mbic2Adm, Xm=Q, p1=482906, p2=4723, minpv=0.15, multif=FALSE) # p1 and p2 are weigths for mbic2Adm (effective number of tests for X and Z); # p1 is usually a number of SNPs, p2 can be estimated using permutation test # (you can use 'permutationTests' function); # warning: multi-forward step (multif=TRUE) is not adapted to admixtures # analysis ### additional functions # combine and transpose matrices and change column names X <- read.big.matrix("Xtrans.txt", sep="\t", type="char", has.row.names=TRUE, shared=FALSE) Z <- read.big.matrix("Ztrans.txt", sep="\t", type="char", has.row.names=TRUE, shared=FALSE) new.names <- c(paste0("X", rownames(X)), paste0("Z", rownames(Z))) combineBigMatrices(X, Z, file.out="XZ.txt", transpose=TRUE, colnames=new.names) # permutation tests Z <- read.big.matrix("Z.txt", sep=" ", type="char", head=TRUE) y <- read.table("Trait.txt") permutationTests(Z, y, N=1000)