# The input for the full admixture analysis should consist of three
# text files: one with trait values Y, second with values of the
# genome-wide ancestry Q, third containing the genotype
# matrix X and the matrix of locus specific ancestries Z (two matrices
# in one file). If you have X and Z in separete files and you have problems
# with combine them (files are big and you do not have enough RAM), you can
# use 'combineBigMatrices' function).
# 
# The input files should be of the following forms:

## Y and Q:
# 0.192
# 0.738
# 0.473
# ...
# (one column of numbers)

## matrices X and Z:
# XSNP1 XSNP2 XSNP3 ... ZANC1 ZANC2 ZANC3 ...
#   0     1     0         2     1     0   ...
#   0     1     1         1     1     0   ...
#   1     2     0         1     1     2   ...
#  ...
# Genotypes (ancestries) of consecutive SNPs appear in different columns;
# names of genotype have to start with letter "X" and names of ancestry
# state variables of a given SNP have to start with letter "Z";
# names should not coincide; any separator can be used between columns.
# If you have observations in columns, you can use 'transposeBigMatrix'
# function (if you do not have enough RAM).
# Missing values should be coded as NA.

# The main function for admixture analysis is 'selectModel' from
# package 'bigstep'. To see how to use it, type '?selectModel'.


### example of use
library(bigstep)
library(bigmemory)
library(matrixStats)
source("admixtures_functions.R")

# only genotype-based GWAS
X <- read.big.matrix("X.txt", sep=" ", type="char", header=TRUE, shared=FALSE)
# (if you have enough RAM, you do not have to use 'read.big.matrix' function,
# but eg. 'read.table' and all analysis will be much quicker;
# to perform only genotype-based GWAS, names of columns can start with
# any letter)
y <- read.table("Trait.txt")
M <- ncol(X)
selectModel(X, y, crit=mbic2, p=M, minpb=0.15, multif=FALSE)

# joint genotype and ancestry-based GWAS
XZ <- read.big.matrix("XZ.txt", sep=" ", type="char", header=TRUE, shared=FALSE)
y <- read.table("Trait.txt")
Q <- read.table("Q.txt")
selectModel(XZ, y, crit=mbic2Adm, Xm=Q, p1=482906, p2=4723,
            minpv=0.15, multif=FALSE)
# p1 and p2 are weigths for mbic2Adm (effective number of tests for X and Z);
# p1 is usually a number of SNPs, p2 can be estimated using permutation test
# (you can use 'permutationTests' function);
# warning: multi-forward step (multif=TRUE) is not adapted to admixtures
# analysis


### additional functions
# combine and transpose matrices and change column names
X <- read.big.matrix("Xtrans.txt", sep="\t", type="char", has.row.names=TRUE,
                     shared=FALSE)
Z <- read.big.matrix("Ztrans.txt", sep="\t", type="char", has.row.names=TRUE,
                     shared=FALSE)
new.names <- c(paste0("X", rownames(X)), paste0("Z", rownames(Z)))
combineBigMatrices(X, Z, file.out="XZ.txt", transpose=TRUE, colnames=new.names)

# permutation tests
Z <- read.big.matrix("Z.txt", sep=" ", type="char", head=TRUE)
y <- read.table("Trait.txt")
permutationTests(Z, y, N=1000)