sociogenomics2022

Lab 3. Sociogenomics

 head Data/1kg_EU_BMI.fam

Descriptive Statistics

Allele frequency

 ./plink --bfile Data/hapmap-ceu  --freq --out Results/Allele_Frequency
head Results/Allele_Frequency.frq 

Missing values

individuals



./plink --bfile Data/hapmap-ceu --missing --out Results/missing_data

variants


head Data/missing_data.imiss

Filter females


./plink     --bfile Data/hapmap-ceu \
            --filter-females \
            --make-bed \
        --out Results/hapmap_filter_females

Quality control



./plink --bfile Data/1kg_hm3 --mind 0.05 --make-bed --out Results/1kg_hm3_mind005

Calculate heterozygocity


./plink --bfile Data/1kg_hm3 --het --out Results/1kg_hm3_het

Check discordant sex


./plink --bfile Data/hapmap-ceu --check-sex --out Results/hapmap_sexcheck 

Low call-rate SNPS


./plink --bfile Data/1kg_hm3 --geno 0.05 --make-bed --out Results/1kg_hm3_geno

Allele frequency


./plink --bfile Data/1kg_hm3 --maf 0.01 --make-bed  --out Results/1kg_hm3_maf

deviation from HWE


./plink --bfile Data/1kg_hm3 --hwe 0.00001 --make-bed  --out Results/1kg_hm3_hwe

Plink QC



./plink     --bfile Data/1kg_hm3 \
        --mind 0.03 \
        --geno 0.05 \
        --maf 0.01 \
    --hwe 0.00001 \
    --exclude Data/individuals_failQC.txt \
        --make-bed  --out Results/1kg_hm3_QC      

Association analyss

Linear additive model

./plink    	 --bfile Data/1kg_EU_BMI \
        	 --snps rs9674439 \
       	 --assoc \
      	 --linear \
      	 --out Results/BMIrs9674439

Logistic additive model


./plink    	 --bfile Data/1kg_EU_Overweight \
        	 --snps rs9674439 \
       	 --assoc \
      	 --logistic \
      	 --out Results/Overweight_rs9674439

Linear dominant analysis

./plink    	 --bfile Data/1kg_EU_BMI \
        	 --snps rs9674439 \
       	 --assoc \
      	 --linear dominant \
      	 --out Results/BMIrs9674439

Genome-Wide Analysis

./plink    	 --bfile Data/1kg_EU_BMI \
       	 --assoc \
      	 --linear \
      	 --out Results/BMIgwas

Calculate linkage disequilibrium

./plink --bfile Data/hapmap-ceu --ld rs2883059 rs2777888 --out Results/ld_example

Caluclate independent SNPs (Pruning)


./plink 	  --bfile Results/1kg_hm3_qc --maf 0.01 \
        --indep-pairwise 50 5 0.2 \
        --out  Results/1kg_hm3_qc_pruned

Select from original sample independent SNPs

./plink	--bfile  Results/1kg_hm3_qc \
	--extract Results/1kg_hm3_qc_pruned.prune.in \
	--make-bed \
 		--out  Results/1kg_hm3_pruned

Calculate PCA

./plink --bfile  Results/1kg_hm3_pruned --pca 10 --out  Results/1kg_pca

Calculate Identity By State matrix

./plink --bfile  Results/1kg_hm3_pruned \
		--keep Data/1kg_samples_EUR.txt \
		--distance --out Results/ibs_matrix

Calculate relatedness matrix

./plink --bfile Results/1kg_hm3_pruned --keep Data/1kg_samples_EUR.txt --make-rel --out Results/rel_matrix

Using GCTA https://yanglab.westlake.edu.cn/software/gcta/#Download

download data

cd $HOME
cd Sociogenomics/Software

wget -O gcta.zip https://yanglab.westlake.edu.cn/software/gcta/bin/gcta_v1.94.0Beta_linux_kernel_3_x86_64.zip

unzip gcta.zip

chmod +x gcta
./gcta64	--bfile Data/1kg_EU_BMI \
 	 	--autosome \
 	--maf 0.01 \
 	--make-grm \
 	--out Results/1kg_gcta 
./gcta64 --grm Results/1kg_gcta --grm-cutoff 0.025 --make-grm --out Results/1kg_rm025
./gcta64 --grm Results/1kg_rm025 --pheno Data/BMI_pheno.txt --reml --out Results/1kg_BMI_h2