In this lab we will learn:
Let’s have a look a the file.
cd $HOME/Sociogenomics
rm Data/*.*
wget -O Data/lab_week4.zip https://www.dropbox.com/s/wum0okiq4dbn9p0/week4.zip?dl=0
unzip -o -d Data/ Data/lab_week4.zip
mv Data/week4/*.* Data/
rm -r Data/week4/
head Data/1kg_EU_BMI.fam
how many observations?
wc -l Data/1kg_EU_BMI.fam
how many variants?
wc -l Data/1kg_EU_BMI.bim
Allele frequency
./plink --bfile Data/hapmap-ceu --freq --out Results/Allele_Frequency
head Results/Allele_Frequency.frq
individuals
./plink --bfile Data/hapmap-ceu --missing --out Results/missing_data
variants
head Data/missing_data.imiss
Filter females
./plink --bfile Data/hapmap-ceu \
--filter-females \
--make-bed \
--out Results/hapmap_filter_females
./plink --bfile Data/1kg_EU_BMI \
--mind 0.05 \
--make-bed \
--out Results/1kg_hm3_mind005
Calculate heterozygocity
./plink --bfile Data/1kg_EU_BMI \
--het --out Results/1kg_hm3_het
Check discordant sex
./plink --bfile Data/hapmap-ceu \
--check-sex \
--out Results/hapmap_sexcheck
Low call-rate SNPS
./plink --bfile Data/1kg_EU_BMI \
--geno 0.05 \
--make-bed \
--out Results/1kg_hm3_geno
Allele frequency
./plink --bfile Data/1kg_EU_BMI \
--maf 0.01 \
--make-bed --out Results/1kg_hm3_maf
deviation from HWE
./plink --bfile Data/1kg_EU_BMI \
--hwe 0.00001 \
--make-bed --out Results/1kg_hm3_hwe
PLINK QC. combine different commands in one go
./plink --bfile Data/1kg_EU_BMI \
--mind 0.03 \
--geno 0.05 \
--maf 0.01 \
--hwe 0.00001 \
--make-bed --out Results/1kg_hm3_QC
Calculate linkage disequilibrium
./plink --bfile Data/hapmap-ceu \
--ld rs2883059 rs2777888 \
--out Results/ld_example
Calculate independent SNPs (Pruning)
./plink --bfile Results/1kg_EU_BMI --maf 0.01 \
--indep-pairwise 50 5 0.2 \
--out Results/1kg_hm3_qc_pruned
Select from original sample independent SNPs
./plink --bfile Results/1kg_EU_BMI \
--extract Results/1kg_hm3_qc_pruned.prune.in \
--make-bed \
--out Results/1kg_hm3_pruned
./plink --bfile Results/1kg_hm3_pruned \
--pca 10 \
--out Results/1kg_pca