QIIME Tutorial - Quality Filtering/OTU picking and Analysis

Some hints regarding the

Execute command: Shift+Enter

In [*] means code is running!

In [ ]:
#Load for visualizations
In [ ]:
from IPython.display import FileLink, FileLinks,Image, IFrame

Quality Filtering/OTU picking

In [ ]:
cd ~/kellyanne/
In [ ]:
ls
In [ ]:
cd KD1/
In [ ]:
ls
In [ ]:
#_R1_ = forward read
#_R2_ = reverse read

1. Trimming reads (with Trimmomatic)

In [ ]:
#generates 4 output files
#SLIDINGWINDOW:<windowSize>:<requiredQuality>
#HEADCROP:<length>
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!fastqc KD1_S51_L001_R1_001.fastq.gz
In [ ]:
IFrame('/files/kellyanne/KD1/KD1_S51_L001_R1_001_fastqc.html', width=1400, height=700)
In [ ]:
!fastqc output_forward_paired.fastq
In [ ]:
IFrame('/files/kellyanne/KD1/output_forward_paired_fastqc.html', width=1400, height=700)
In [ ]:
!fastqc KD1_S51_L001_R2_001.fastq.gz
In [ ]:
IFrame('/files/kellyanne/KD1/KD1_S51_L001_R2_001_fastqc.html', width=1400, height=700)
In [ ]:
!fastqc output_reverse_paired.fastq
In [ ]:
IFrame('/files/kellyanne/KD1/output_reverse_paired_fastqc.html', width=1400, height=700)
In [ ]:
ls
In [ ]:
rm *.html
In [ ]:
rm *.zip

2. Join paired ends

In [ ]:
ls
In [ ]:
#Joins forward and reverse reads
#minimum overlap 30 base pairs (-j); maximum 10% difference (-p)
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
ls
In [ ]:
ls paired/

3. Convert Fastq to Fasta

In [ ]:
#Fastq = Fasta/fna + Qual
#Needed for next step
In [ ]:
!head paired/fastqjoin.join.fastq
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
!head fastqjoin.join.fna
In [ ]:
cd ../

4. Repeat for each sample

In [ ]:
#KD2
In [ ]:
cd KD2/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
cd ../
In [ ]:
#KD3
In [ ]:
cd KD3/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
cd ../
In [ ]:
#KD4
In [ ]:
cd KD4/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
cd ../
In [ ]:
#KD5/
In [ ]:
cd KD5/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
cd ../
In [ ]:
#KD6
In [ ]:
cd KD6/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
cd ../
In [ ]:
#KD7
In [ ]:
cd KD7/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
cd ../
In [ ]:
#KD8
In [ ]:
cd KD8/
In [ ]:
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100 
In [ ]:
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10     
In [ ]:
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
In [ ]:
ls
In [ ]:
cd ../
In [ ]:
ls

5. Combine joined files and rename each with unique identifier

In [ ]:
#Makes new directory called "Seqs"
#Copies paired read file to new directory and changes name
#Unique Identifier will become the "QIIME Label"
#corresponds to "InputFileName" column on mappingfile
In [ ]:
!mkdir Seqs
!cp KD1/fastqjoin.join.fna Seqs/KD1
!cp KD2/fastqjoin.join.fna Seqs/KD2
!cp KD3/fastqjoin.join.fna Seqs/KD3
!cp KD4/fastqjoin.join.fna Seqs/KD4
!cp KD5/fastqjoin.join.fna Seqs/KD5
!cp KD6/fastqjoin.join.fna Seqs/KD6
!cp KD7/fastqjoin.join.fna Seqs/KD7
!cp KD8/fastqjoin.join.fna Seqs/KD8
In [ ]:
cat mappingfile.txt
In [ ]:
ls Seqs/

6. Add QIIME Labels

In [ ]:
#Adds Unique Identifer from "InputFileName" column onto appropriate read
#generates combined file "combined_seqs.fna"
In [ ]:
!add_qiime_labels.py -m mappingfile.txt -i Seqs/ -c InputFileName
In [ ]:
!head combined_seqs.fna

7. Closed Reference OTU Picking

In [ ]:
#Code takes ~10 minutes
!pick_closed_reference_otus.py -i combined_seqs.fna -o closed_ref
In [ ]:
ls

Analysis

In [ ]:
ls closed_ref/
In [ ]:
#Make Working Directory and move files into it
In [ ]:
mkdir Workshop_Analysis
In [ ]:
cp mappingfile.txt Workshop_Analysis/
In [ ]:
cp closed_ref/otu_table.biom Workshop_Analysis
In [ ]:
cp closed_ref/97_otus.tree Workshop_Analysis/
In [ ]:
#Only use this if you were unable to make an OTU table (remove '#')
#!cp Just_in_case/otu_table.biom Workshop_Analysis/
In [ ]:
#Only use this if you were unable to make an OTU table (remove '#')
#!cp Just_in_case/97_otus.tree Workshop_Analysis/
In [ ]:
cd Workshop_Analysis/
In [ ]:
ls
In [ ]:
#Filter out OTUs not found in at least 3 samples
In [ ]:
!filter_otus_from_otu_table.py -i otu_table.biom -o otu_table_filtered_min3samp.biom -s 3

Beta Diversity

Beta Diversity Through Plots

In [ ]:
!beta_diversity_through_plots.py -i otu_table_filtered_min3samp.biom -m mappingfile.txt -o beta_diversity_through_plots -t 97_otus.tree 
In [ ]:
#Unweighted Unifrac PCoA
In [ ]:
IFrame('/files/kellyanne/Workshop_Analysis/beta_diversity_through_plots/unweighted_unifrac_emperor_pcoa_plot/index.html', width=800, height=500)
In [ ]:
#Weighted Unifrac PCoA
In [ ]:
IFrame('/files/kellyanne/Workshop_Analysis/beta_diversity_through_plots/weighted_unifrac_emperor_pcoa_plot/index.html', width=800, height=500)

Distance Boxplots

In [ ]:
!beta_diversity.py -s
In [ ]:
#Default is unweighted and weighted unifrac
In [ ]:
!beta_diversity.py -i otu_table_filtered_min3samp.biom -o beta_diversity -t 97_otus.tree
In [ ]:
#Unweighted distance boxplot by Genotype (Column in mappingfile)
In [ ]:
!make_distance_boxplots.py -m mappingfile.txt -o distance_boxplots_unweighted -d beta_diversity/unweighted_unifrac_otu_table_filtered_min3samp.txt -f Genotype -n 999 --suppress_all_within --suppress_all_between --suppress_individual_between
In [ ]:
IFrame("/files/kellyanne/Workshop_Analysis/distance_boxplots_unweighted/Genotype_Distances.pdf",width=300, height=650)
In [ ]:
cat distance_boxplots_unweighted/Genotype_Stats.txt
In [ ]:
#Weighted distance boxplot by Genotype (Column in mappingfile)
In [ ]:
!make_distance_boxplots.py -m mappingfile.txt -o distance_boxplots_weighted -d beta_diversity/weighted_unifrac_otu_table_filtered_min3samp.txt -f Genotype -n 999 --suppress_all_within --suppress_all_between --suppress_individual_between 
In [ ]:
IFrame("/files/kellyanne/Workshop_Analysis/distance_boxplots_weighted/Genotype_Distances.pdf",width=300, height=650)
In [ ]:
cat distance_boxplots_weighted/Genotype_Stats.txt
In [ ]:
#Unweighted Permanova
In [ ]:
!compare_categories.py --method permanova -i beta_diversity/unweighted_unifrac_otu_table_filtered_min3samp.txt -m mappingfile.txt -c Genotype -o compare_categories_unweighted
In [ ]:
cat compare_categories_unweighted/permanova_results.txt
In [ ]:
#Weighted Permanova
In [ ]:
!compare_categories.py --method permanova -i beta_diversity/weighted_unifrac_otu_table_filtered_min3samp.txt -m mappingfile.txt -c Genotype -o compare_categories_weighted
In [ ]:
cat compare_categories_weighted/permanova_results.txt

Differential Abundance

In [ ]:
#Break down by taxonomy
#-a flag indicates absolute abundance, needed for differential abundance
#Don't use normalized data, DESeq2 considers unequal sampling depth
In [ ]:
!summarize_taxa.py -i otu_table_filtered_min3samp.biom -o summarize_taxa --suppress_classic_table_output -a
In [ ]:
cp mappingfile.txt summarize_taxa/mappingfile.txt
In [ ]:
cd summarize_taxa/
In [ ]:
ls
In [ ]:
#-c flag indicates column in mappingfile to compare
#-x and -y flags indicate which groups within column to compare
In [ ]:
!differential_abundance.py -i otu_table_filtered_min3samp_L2.biom -a DESeq2_nbinom -m mappingfile.txt -c Genotype -x WT -y KO -o differential_abundance_L2
In [ ]:
cat differential_abundance_L2
In [ ]:
!differential_abundance.py -i otu_table_filtered_min3samp_L3.biom -a DESeq2_nbinom -m mappingfile.txt -c Genotype -x WT -y KO -o differential_abundance_L3
In [ ]:
cat differential_abundance_L3
In [ ]:
cd ../

Alpha Diversity

Rarefy and Diversity Indicies

In [ ]:
!biom summarize-table -i otu_table_filtered_min3samp.biom
In [ ]:
!single_rarefaction.py -i otu_table_filtered_min3samp.biom -o otu_table_filtered_min3samp_rarefied_16800 -d 16800
In [ ]:
!alpha_diversity.py -i otu_table_filtered_min3samp_rarefied_16800 -m observed_otus,shannon,chao1,simpson_e -t 97_otus.tree -o alpha_diversity
In [ ]:
cat alpha_diversity

Alpha Rarefaction

In [ ]:
!alpha_rarefaction.py -f -i otu_table_filtered_min3samp.biom -m mappingfile.txt -o alpha_rarefaction -t 97_otus.tree 
In [ ]:
#takes ~20 minutes
In [ ]:
IFrame("/files/kellyanne/Workshop_Analysis/alpha_rarefaction/alpha_rarefaction_plots/rarefaction_plots.html",width=900, height=650)