宏基因組上游分析流程
kneaddata去除宿主基因組和質(zhì)控
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/6M_WR/*/*/*_1.fq.gz); do j=`basename $i`;
echo "/home/ywwang/miniconda3/bin/kneaddata -i1 ${i} -i2 ${i%%_1.fq.gz}_2.fq.gz \
-o /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3/ -v -t 40 --remove-intermediate-output \
--trimmomatic /home/ywwang/software/Trimmomatic-0.38 \
--trimmomatic-options 'ILLUMINACLIP:/home/ywwang/software/Trimmomatic-0.38/adapters/TruSeq2-PE.fa:2:40:15 \
SLIDINGWINDOW:4:20 MINLEN:50' \
--bowtie2-options '--very-sensitive --dovetail' \
-db /home/ywwang/Publicdata/genome/mouse/mouse_GRCm39 \
" > /home/yzhang/workspace/wanglx/KM_MOUSE/pbs/white_test/${j%%_1.fastq.gz}.pbs;done
###上面這一步注意換行符,一定要有,不然跑不了
cd /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3
qsub -q batch -V -l nodes=2:ppn=4?
"/home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3/DP845001358BR_L01_195_1.fq.gz.pbs"
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/pbs/test/*.pbs); do qsub -q batch -V -l nodes=2:ppn=10 $i;done
##把上一步比對到宿主參考基因組的序列移除到contam_seq文件夾下
nohup kneaddata_read_count_table --input /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3 --output /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/kneaddata_read_counts.txt &\
mkdir /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/contam_seq/
mv /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3/*_contam*fastq /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/contam_seq
##合并reads
mkdir /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/output3/*_paired_1.fastq); do j=`basename $i`;
echo "cat? ${i} ${i%%_paired_1.fastq}_paired_2.fastq ${i%%_paired_1.fastq}_unmatched_1.fastq ${i%%_paired_1.fastq}_unmatched_2.fastq? | awk '{if(NR%4==1) print \"@\"NR; else print \$0}'> /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/${j%%_paired_1.fastq}.fastq" >/home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/${j%%_paired_1.fastq}.pbs;done
## 運(yùn)行merge文件夾下的合并reads任務(wù)
cd /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/*.pbs); do qsub -q batch -V -l nodes=4:ppn=12 $i;done
## 計(jì)算count reads
## 放置count的目錄
mkdir /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/
##生成腳本
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/*.fastq); do j=`basename $i`;
echo "/home/yxtu/miniconda3/envs/metaphlan/bin/metaphlan ${i} --nproc 5 --input_type fastq -t rel_ab_w_read_stats --bowtie2db /home/yxtu/miniconda3/envs/metaphlan/lib/python3.7/site-packages/metaphlan/metaphlan_databases/ -x mpa_v30_CHOCOPhlAn_201901 -o /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/${j%%.fastq}_counts.txt" > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/counts_${j%%.fastq}.pbs;done
## 運(yùn)行count文件夾下的計(jì)算count任務(wù)
cd /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/counts_*.pbs); do qsub -q batch -V -l nodes=2:ppn=16 $i;done
## 計(jì)算相對豐度
## 相對豐度的存放文件夾
mkdir /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/
## 舊方法:
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/*.fastq); do j=`basename $i`;
echo "metaphlan ${i} --input_type fastq -o /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/${j%%.fastq}_metaphlan.txt" > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/${j%%.fastq}_metaphlan.pbs;done
## 新方法:
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/merge/*.fastq.bowtie2out.txt); do j=`basename $i`;
echo "/home/yxtu/miniconda3/envs/metaphlan/bin/metaphlan ${i} --nproc 5 --input_type bowtie2out --bowtie2db /home/yxtu/miniconda3/envs/metaphlan/lib/python3.7/site-packages/metaphlan/metaphlan_databases/ -x mpa_v30_CHOCOPhlAn_201901 -o /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/${j%%.fastq}_metaphlan.txt" > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/${j%%.fastq}_metaphlan.pbs;done
## 運(yùn)行relat_abundance文件夾下的計(jì)算相對豐度任務(wù)
cd /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/*_metaphlan.pbs); do qsub -q batch -V -l nodes=2:ppn=16 $i;done
## 合并相對豐度
/home/yxtu/miniconda3/envs/metaphlan/bin/merge_metaphlan_tables.py /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/*metaphlan.txt >/home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt?
把微生物的種類分開,依次是種,屬、門、綱、目、科
mkdir /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/
grep -E '(s__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 't__'|sed 's/^.*s__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_species.txt
grep -E '(g__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 's__'|sed 's/^.*g__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_genus.txt
grep -E '(f__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 'g__'|sed 's/^.*f__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_family.txt
grep -E '(o__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 'f__'|sed 's/^.*o__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_order.txt
grep -E '(c__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 'o__'|sed 's/^.*c__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_class.txt
grep -E '(p__)|(clade_name)' /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/relat_abundance/merged_abundance_table.txt |grep -v 'c__'|sed 's/^.*p__//g'|awk '{$2=null;print}'|sed 's/\ \ /\ /g'|sed 's/\ /\t/g' > /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/Micro_kind/merged_abundance_table_phylum.txt
### merge原始Count文件整理,提取第1,2,5列
cd /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/*_kneaddata_counts.txt);do cut -f1,2,5 ${i} >${i%%_kneaddata_counts.txt}_modify_counts.txt;done
for i in $(ls /home/yzhang/workspace/wanglx/KM_MOUSE/Data/CRC_human/kneaddata/count/*_modify_counts.txt);do sed -e '4d' ${i} > ${i%%_modify_counts.txt}_modify2_counts.txt;done