#### These are the commands used for the example de novo isolate genome assembly tutorial found here: https://astrobiomike.github.io/genomics/de_novo_assembly #### ## Quality filtering ## # running fastqc on the starting fastq files fastqc B_cepacia_raw_R1.fastq.gz B_cepacia_raw_R2.fastq.gz -t 4 # first trimmomatic quality filtering pass trimmomatic PE B_cepacia_raw_R1.fastq.gz B_cepacia_raw_R2.fastq.gz BCep_R1_paired.fastq.gz BCep_R1_unpaired.fastq.gz BCep_R2_paired.fastq.gz BCep_R2_unpaired.fastq.gz LEADING:10 TRAILING:10 SLIDINGWINDOW:5:20 MINLEN:151 -threads 4 # second trimmomatic quality filtering pass trimmomatic PE B_cepacia_raw_R1.fastq.gz B_cepacia_raw_R2.fastq.gz BCep_R1_paired.fastq.gz BCep_R1_unpaired.fastq.gz BCep_R2_paired.fastq.gz BCep_R2_unpaired.fastq.gz CROP:140 LEADING:10 TRAILING:10 SLIDINGWINDOW:5:20 MINLEN:140 -threads 4 # running fastqc on the quality-filtered reads fastqc BCep_R1_paired.fastq.gz BCep_R2_paired.fastq.gz -t 4 ## Assembly ## # error-correction step run on a server spades.py -1 BCep_R1_paired.fastq.gz -2 BCep_R2_paired.fastq.gz -o spades_error_corrected_reads -t 50 -m 500 --only-error-correction # spades assemblies # default spades.py -1 BCep_R1_QCd_err_cor.fastq.gz -2 BCep_R2_QCd_err_cor.fastq.gz -o spades_default_assembly -t 4 --only-assembler # setting specific kmers and --careful mode spades.py -1 BCep_R1_QCd_err_cor.fastq.gz -2 BCep_R2_QCd_err_cor.fastq.gz -t 4 -o spades_kmers_set_careful_assembly -k 21,33,55,77 --careful --only-assembler # megahit assemblies # default megahit -1 BCep_R1_QCd_err_cor.fastq.gz -2 BCep_R2_QCd_err_cor.fastq.gz -o megahit_default_assembly -t 4 # changing --min-count megahit -1 BCep_R1_QCd_err_cor.fastq.gz -2 BCep_R2_QCd_err_cor.fastq.gz -o megahit_min_count_3_assembly -t 4 --min-count 3 ## Comparing assemblies ## # running quast quast -o quast_B_cep_out -R reference_genome/BCep_ref.fna -G reference_genome/BCep_ref.gff -l "spades_default, spades_kmers_careful, megahit_default, megahit_min_count_3" spades_default_assembly/contigs.fasta spades_kmers_set_careful_assembly/contigs.fasta megahit_default_assembly/final.contigs.fa megahit_min_count_3_assembly/final.contigs.fa -t 4 -m 1000 ## Exploring our assembly with anvi'o ## # generting contigs db anvi-gen-contigs-database -f spades_kmers_set_careful_assembly/contigs.fasta -o contigs.db -n B_cepacia_assembly # HMM searching for single-copy genes and rRNAs anvi-run-hmms -I Campbell_et_al -c contigs.db -T 4 anvi-run-hmms -I Ribosomal_RNAs -c contigs.db -T 4 # functional annotation with DIAMOND against NCBI's COGs anvi-setup-ncbi-cogs -T 4 # only needed the first time anvi-run-ncbi-cogs -c contigs.db --num-threads 4 # exporting Prodigal-identified open-reading frames from anvi'o anvi-get-sequences-for-gene-calls -c contigs.db -o gene_calls.fa # setting up and running centrifuge for taxonomy, only needed first time wget ftp://ftp.ccb.jhu.edu/pub/infphilo/centrifuge/data/p_compressed+h+v.tar.gz tar -xzvf p_compressed+h+v.tar.gz && rm -rf p_compressed+h+v.tar.gz # running centrifuge centrifuge -f -x p_compressed+h+v gene_calls.fa -S centrifuge_hits.tsv -p 4 # importing the taxonomy results into our anvi'o contigs database anvi-import-taxonomy-for-genes -c contigs.db -i centrifuge_report.tsv centrifuge_hits.tsv -p centrifuge # generating mapping files with bowtie2 bowtie2-build spades_kmers_set_careful_assembly/contigs.fasta spades_kmers_set_careful_assembly.btindex bowtie2 -q -x spades_kmers_set_careful_assembly.btindex -1 BCep_R1_QCd_err_cor.fastq.gz -2 BCep_R2_QCd_err_cor.fastq.gz -p 4 -S spades_kmers_set_careful_assembly.sam # converting sam to bam samtools view -bS spades_kmers_set_careful_assembly.sam > B_cep_assembly.bam # sorting and indexing our bam file (can be done with samtools also) anvi-init-bam B_cep_assembly.bam -o B_cep.bam # profiling with anvi'o anvi-profile -i B_cep.bam -c contigs.db -M 1000 -T 4 --cluster-contigs -o B_cep_profiled/ # pulling ribosomal RNAs anvi-get-sequences-for-hmm-hits -c contigs.db --hmm-sources Ribosomal_RNAs -o rRNAs.fa # pulling AA seqs of single-copy HMM hits anvi-get-sequences-for-hmm-hits -c contigs.db --hmm-sources Campbell_et_al --get-aa-sequences -o bacterial_SCGs.faa # adding all contigs to a group called "DEFAULT" anvi-script-add-default-collection -p B_cep_profiled/PROFILE.db # summarizing anvi-summarize -c contigs.db -p B_cep_profiled/PROFILE.db -C DEFAULT -o B_cepacia_assembly_summary/ # launching anvi'o interactive interface anvi-interactive -c contigs.db -p B_cep_profiled/PROFILE.db --title "B. cepacia assembly"