#Load for visualizations
from IPython.display import FileLink, FileLinks,Image, IFrame
cd ~/kellyanne/
ls
cd KD1/
ls
#_R1_ = forward read
#_R2_ = reverse read
#generates 4 output files
#SLIDINGWINDOW:<windowSize>:<requiredQuality>
#HEADCROP:<length>
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!fastqc KD1_S51_L001_R1_001.fastq.gz
IFrame('/files/kellyanne/KD1/KD1_S51_L001_R1_001_fastqc.html', width=1400, height=700)
!fastqc output_forward_paired.fastq
IFrame('/files/kellyanne/KD1/output_forward_paired_fastqc.html', width=1400, height=700)
!fastqc KD1_S51_L001_R2_001.fastq.gz
IFrame('/files/kellyanne/KD1/KD1_S51_L001_R2_001_fastqc.html', width=1400, height=700)
!fastqc output_reverse_paired.fastq
IFrame('/files/kellyanne/KD1/output_reverse_paired_fastqc.html', width=1400, height=700)
ls
rm *.html
rm *.zip
ls
#Joins forward and reverse reads
#minimum overlap 30 base pairs (-j); maximum 10% difference (-p)
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
ls
ls paired/
#Fastq = Fasta/fna + Qual
#Needed for next step
!head paired/fastqjoin.join.fastq
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
!head fastqjoin.join.fna
cd ../
#KD2
cd KD2/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
cd ../
#KD3
cd KD3/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
cd ../
#KD4
cd KD4/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
cd ../
#KD5/
cd KD5/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
cd ../
#KD6
cd KD6/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
cd ../
#KD7
cd KD7/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
cd ../
#KD8
cd KD8/
!trimmomatic PE -phred33 *_R1_001.fastq.gz *_R2_001.fastq.gz output_forward_paired.fastq output_forward_unpaired.fastq output_reverse_paired.fastq output_reverse_unpaired.fastq ILLUMINACLIP:/home/ubuntu/kellyanne/NexteraPE-PE.fa:2:30:10 HEADCROP:18 LEADING:3 TRAILING:3 SLIDINGWINDOW:5:18 MINLEN:100
!join_paired_ends.py -f output_forward_paired.fastq -r output_reverse_paired.fastq -o paired -j 30 -p 10
!convert_fastaqual_fastq.py -f paired/fastqjoin.join.fastq -c fastq_to_fastaqual
ls
cd ../
ls
#Makes new directory called "Seqs"
#Copies paired read file to new directory and changes name
#Unique Identifier will become the "QIIME Label"
#corresponds to "InputFileName" column on mappingfile
!mkdir Seqs
!cp KD1/fastqjoin.join.fna Seqs/KD1
!cp KD2/fastqjoin.join.fna Seqs/KD2
!cp KD3/fastqjoin.join.fna Seqs/KD3
!cp KD4/fastqjoin.join.fna Seqs/KD4
!cp KD5/fastqjoin.join.fna Seqs/KD5
!cp KD6/fastqjoin.join.fna Seqs/KD6
!cp KD7/fastqjoin.join.fna Seqs/KD7
!cp KD8/fastqjoin.join.fna Seqs/KD8
cat mappingfile.txt
ls Seqs/
#Adds Unique Identifer from "InputFileName" column onto appropriate read
#generates combined file "combined_seqs.fna"
!add_qiime_labels.py -m mappingfile.txt -i Seqs/ -c InputFileName
!head combined_seqs.fna
#Code takes ~10 minutes
!pick_closed_reference_otus.py -i combined_seqs.fna -o closed_ref
ls
ls closed_ref/
#Make Working Directory and move files into it
mkdir Workshop_Analysis
cp mappingfile.txt Workshop_Analysis/
cp closed_ref/otu_table.biom Workshop_Analysis
cp closed_ref/97_otus.tree Workshop_Analysis/
#Only use this if you were unable to make an OTU table (remove '#')
#!cp Just_in_case/otu_table.biom Workshop_Analysis/
#Only use this if you were unable to make an OTU table (remove '#')
#!cp Just_in_case/97_otus.tree Workshop_Analysis/
cd Workshop_Analysis/
ls
#Filter out OTUs not found in at least 3 samples
!filter_otus_from_otu_table.py -i otu_table.biom -o otu_table_filtered_min3samp.biom -s 3
!beta_diversity_through_plots.py -i otu_table_filtered_min3samp.biom -m mappingfile.txt -o beta_diversity_through_plots -t 97_otus.tree
#Unweighted Unifrac PCoA
IFrame('/files/kellyanne/Workshop_Analysis/beta_diversity_through_plots/unweighted_unifrac_emperor_pcoa_plot/index.html', width=800, height=500)
#Weighted Unifrac PCoA
IFrame('/files/kellyanne/Workshop_Analysis/beta_diversity_through_plots/weighted_unifrac_emperor_pcoa_plot/index.html', width=800, height=500)
!beta_diversity.py -s
#Default is unweighted and weighted unifrac
!beta_diversity.py -i otu_table_filtered_min3samp.biom -o beta_diversity -t 97_otus.tree
#Unweighted distance boxplot by Genotype (Column in mappingfile)
!make_distance_boxplots.py -m mappingfile.txt -o distance_boxplots_unweighted -d beta_diversity/unweighted_unifrac_otu_table_filtered_min3samp.txt -f Genotype -n 999 --suppress_all_within --suppress_all_between --suppress_individual_between
IFrame("/files/kellyanne/Workshop_Analysis/distance_boxplots_unweighted/Genotype_Distances.pdf",width=300, height=650)
cat distance_boxplots_unweighted/Genotype_Stats.txt
#Weighted distance boxplot by Genotype (Column in mappingfile)
!make_distance_boxplots.py -m mappingfile.txt -o distance_boxplots_weighted -d beta_diversity/weighted_unifrac_otu_table_filtered_min3samp.txt -f Genotype -n 999 --suppress_all_within --suppress_all_between --suppress_individual_between
IFrame("/files/kellyanne/Workshop_Analysis/distance_boxplots_weighted/Genotype_Distances.pdf",width=300, height=650)
cat distance_boxplots_weighted/Genotype_Stats.txt
#Unweighted Permanova
!compare_categories.py --method permanova -i beta_diversity/unweighted_unifrac_otu_table_filtered_min3samp.txt -m mappingfile.txt -c Genotype -o compare_categories_unweighted
cat compare_categories_unweighted/permanova_results.txt
#Weighted Permanova
!compare_categories.py --method permanova -i beta_diversity/weighted_unifrac_otu_table_filtered_min3samp.txt -m mappingfile.txt -c Genotype -o compare_categories_weighted
cat compare_categories_weighted/permanova_results.txt
#Break down by taxonomy
#-a flag indicates absolute abundance, needed for differential abundance
#Don't use normalized data, DESeq2 considers unequal sampling depth
!summarize_taxa.py -i otu_table_filtered_min3samp.biom -o summarize_taxa --suppress_classic_table_output -a
cp mappingfile.txt summarize_taxa/mappingfile.txt
cd summarize_taxa/
ls
#-c flag indicates column in mappingfile to compare
#-x and -y flags indicate which groups within column to compare
!differential_abundance.py -i otu_table_filtered_min3samp_L2.biom -a DESeq2_nbinom -m mappingfile.txt -c Genotype -x WT -y KO -o differential_abundance_L2
cat differential_abundance_L2
!differential_abundance.py -i otu_table_filtered_min3samp_L3.biom -a DESeq2_nbinom -m mappingfile.txt -c Genotype -x WT -y KO -o differential_abundance_L3
cat differential_abundance_L3
cd ../
!biom summarize-table -i otu_table_filtered_min3samp.biom
!single_rarefaction.py -i otu_table_filtered_min3samp.biom -o otu_table_filtered_min3samp_rarefied_16800 -d 16800
!alpha_diversity.py -i otu_table_filtered_min3samp_rarefied_16800 -m observed_otus,shannon,chao1,simpson_e -t 97_otus.tree -o alpha_diversity
cat alpha_diversity
!alpha_rarefaction.py -f -i otu_table_filtered_min3samp.biom -m mappingfile.txt -o alpha_rarefaction -t 97_otus.tree
#takes ~20 minutes
IFrame("/files/kellyanne/Workshop_Analysis/alpha_rarefaction/alpha_rarefaction_plots/rarefaction_plots.html",width=900, height=650)