In this lab we will learn:
cd $HOME/Sociogenomics
wget -O Data/lab_week6.zip https://www.dropbox.com/s/kwciw2cb19gkrzy/week6.zip?dl=0
unzip -o -d Data/ Data/lab_week6.zip
mv Data/week6/*.* Data/
rm -r Data/week6/
We start from a “monogenic score” based only on 1 SNP rs9930506 the file score_rs9930506.txt includes the weights
plink --bfile Data/1kg_hm3_qc \
--score Data/score_rs9930506.txt 1 2 3 \
--pheno Data/BMI_pheno.txt \
--out Results/1kg_FTOscore
This is our “monogenic score”
head Results/1kg_FTOscore.profile
To calculate PGS we need additional software:
cd Software
wget https://github.com/choishingwan/PRSice/releases/download/2.3.5/PRSice_linux.zip
unzip PRSice_linux.zip
Install R on the virtual machine
sudo apt-get install r-base r-base-dev
let’s see if it works
R
install.packages(c("ggplot2", "data.table"))
./PRSice_linux -h
Create symbolic links to PRSice in the Home directory
cd $HOME
cd Sociogenomics
ln -s Software/PRSice.R
ln -s Software/PRSice_linux
Setting up DATA and RESULTS folder variables
DATA="/home/nicola_barban/Sociogenomics/Data"
RESULTS="/home/nicola_barban/Sociogenomics/Results"
Calculating a Polygenic score on BMI based on BMI.txt
Rscript PRSice.R --dir . \
--prsice ./PRSice_linux \
--base ${DATA}/BMI.txt \
--target ${DATA}/1kg_hm3_qc \
--snp MarkerName \
--A1 A1 \
--A2 A2 \
--stat Beta \
--pvalue Pval \
--pheno-file ${DATA}/BMI_pheno.txt \
--bar-levels 1 \
--fastscore \
--binary-target F \
--out ${RESULTS}/BMI_score_all
See the ouput. there are 9 duplicated SNPs that can be removed with the command extract
Rscript PRSice.R --dir . \
--prsice ./PRSice_linux \
--base ${DATA}/BMI.txt \
--target ${DATA}/1kg_hm3_qc \
--snp MarkerName \
--A1 A1 \
--A2 A2 \
--stat Beta \
--pvalue Pval \
--pheno-file ${DATA}/BMI_pheno.txt \
--bar-levels 1 \
--fastscore \
--binary-target F \
--extract ${RESULTS}/BMI_score_all.valid \
--out ${RESULTS}/BMI_score_all
Calculating different scores at various pvalues
Rscript PRSice.R --dir . \
--prsice ./PRSice_linux \
--base ${DATA}/BMI.txt \
--target ${DATA}/1kg_hm3_qc \
--thread 1 \
--snp MarkerName \
--A1 A1 \
--A2 A2 \
--stat Beta \
--pvalue Pval \
--bar-levels 5e-08,5e-07,5e-06,5e-05,5e-04,5e-03,5e-02,5e-01 \
--fastscore \
--all-score \
--no-regress \
--binary-target F \
--extract ${RESULTS}/BMI_score_all.valid \
--out ${RESULTS}/BMIscore_thresholds
See results
head ${RESULTS}/BMIscore_thresholds.all_score
Rscript PRSice.R --dir . \
--prsice ./PRSice_linux\
--base ${DATA}/BMI.txt \
--target ${DATA}/1kg_hm3_qc \
--thread 1 \
--snp MarkerName \
--A1 A1 \
--A2 A2 \
--stat Beta \
--pvalue Pval \
--pheno-file ${DATA}/BMI_pheno.txt \
--interval 0.00005 \
--lower 0.0001 \
--quantile 20 \
--all-score \
--binary-target F \
--extract ${RESULTS}/BMI_score_all.valid \
--out ${RESULTS}/BMIscore_graphics
PGS on binary trait
Rscript PRSice.R --dir . \
--prsice ./PRSice_linux \
--base ${DATA}/BMI.txt \
--target ${DATA}/1kg_hm3_qc \
--thread 1 \
--snp MarkerName \
--A1 A1 \
--A2 A2 \
--stat Beta \
--pvalue Pval \
--no-clump F \
--pheno-file ${DATA}/Obesity_pheno.txt \
--interval 0.00005 \
--lower 0.0001 \
--quantile 5 \
--all-score \
--binary-target T \
--extract ${RESULTS}/BMI_score_all.valid \
--out ${RESULTS}/Obesity_score_graphics
#THIS NEED TO BE EXECUTED IN R
#import external data
setwd("/home/nicola_barban/Sociogenomics/Results")
data<-read.table("BMIscore_thresholds.all_score", header=T)
# show first rows of data
head(data)
# calculate new standardised variable
data$PGS=(data$Pt_1-mean(data$Pt_1))/sd(data$Pt_1, na.rm=T)
#plot histogram of the polygenic score
hist(data$PGS)
# import external data with phenotype
pheno_BMI<-read.table("../Data/BMI_pheno.txt", header=T)
# merge the two datasets
data.with.pheno<-merge(data,pheno_BMI, by="IID")
# run a linear regression model
mod1<-(lm(BMI~PGS, data=data.with.pheno))
summary(mod1)
summary(mod1)$r.square
# create a vector with column names
columns=c("FID", "IID", "pca1", "pca2", "pca3", "pca4", "pca5", "pca6", "pca7", "pca8", "pca9", "pca10")
# read external data with PCAs
pca<- read.table(file="../Data/pca.eigenvec", sep = "", header=F, col.names=columns)[,c(2:12)]
# merge file with covariates with the rest of the file
data.with.covars<-merge(data.with.pheno,pca, by="IID")
# Estimate new linear regression model
mod2<-lm(BMI~PGS+pca1+pca2+pca3+pca3+pca5+pca6+pca7+pca8+pca9+pca10, data=data.with.covars)
# Calculate R-Square
summary(mod2)$r.square
# Estimate linear regression model
mod2.no.pgs<-update(mod2, ~ . - PGS)
# Estimate difference in R-squared
print(summary(mod2)$r.square-summary(mod2.no.pgs)$r.square)
# install new package in R to perform bootstrap
# install new package in R to perform bootstrap
install.packages("boot")
library(boot)
set.seed(12345)
# define new function to calculate differential r-squared.
# The following commands define the new function
rsq <- function(formula, PGS=PGS, data, indices) {
d <- data[indices,]
fit1 <- lm(formula, data=d)
fit2 <- update(fit1, ~ . + PGS)
return(summary(fit2)$r.square-summary(fit1)$r.square)
}
# bootstrapping with 1000 replications
results <- boot(data=data.with.covars, statistic=rsq,
R=1000, formula=BMI~pca1+pca2+pca3+pca3+pca5+pca6+pca7+pca8+pca9+pca10, PGS=PGS)
boot.ci(results, type="norm")