biopython1_序列操作

?目錄:
1.DNA---RNA---protein互相轉(zhuǎn)換



# -*- coding: utf-8 -*-
'''
作者231469242@qq.com
python機(jī)器學(xué)習(xí)生物信息學(xué),up主錄制
騰訊課堂入口
https://ke.qq.com/course/package/31252?tuin=dcbf0ba
網(wǎng)頁(yè)云課堂入口
https://study.163.com/series/1202871604.htm?share=2&shareId=400000000398149
實(shí)際的生物學(xué)上的轉(zhuǎn)錄過(guò)程是將模板鏈反向互補(bǔ)(TCAG → CUGA)生成mRNA。但是,
在Biopython和生物信息學(xué)領(lǐng)域,我們通常會(huì)直接利用編碼鏈,因?yàn)槲覀兛梢酝ㄟ^(guò) T → U的轉(zhuǎn)換獲得mRNA。
'''
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.SeqUtils import GC
from Bio.Alphabet import generic_alphabet
#生成序列
my_seq = Seq("AGTACACTGGT")
'''
my_seq
Out[5]: Seq('AGTACACTGGT', Alphabet())
print my_seq
AGTACACTGGT
'''
my_seq.alphabet
'''
Out[7]: Alphabet()
'''
#互補(bǔ)
my_seq.complement()
'''
Out[8]: Seq('TCATGTGACCA', Alphabet())
'''
#反向互補(bǔ)
my_seq.reverse_complement()
'''
Out[9]: Seq('ACCAGTGTACT', Alphabet())
'''
my_seq = Seq('GATCGATGGGCCTATATAGGATCGAAAATCGC', IUPAC.unambiguous_dna)
#計(jì)算G和C的概率
GC(my_seq)
'''
Out[13]: 46.875
'''
str(my_seq)
'''
Out[14]: 'GATCGATGGGCCTATATAGGATCGAAAATCGC'
'''
protein_seq = Seq("EVRNAK", IUPAC.protein)
dna_seq = Seq("ACGT", IUPAC.unambiguous_dna)
#不同性質(zhì)序列疊加會(huì)報(bào)錯(cuò)
#protein_seq + dna_seq
#轉(zhuǎn)化為通用的字母表后,不同序列可以疊加
?
protein_seq.alphabet = generic_alphabet
dna_seq.alphabet = generic_alphabet
protein_seq + dna_seq
'''Out[27]: Seq('EVRNAKACGT', Alphabet())'''
#創(chuàng)建DNA序列的 編碼鏈
coding_dna = Seq("ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG", IUPAC.unambiguous_dna)
#模板鏈(反向互補(bǔ))
template_dna = coding_dna.reverse_complement()
'''template_dna
Seq('CTATCGGGCACCCTTTCAGCGGCCCATTACAATGGCCAT', IUPACUnambiguousDNA())'''
# transcribe 轉(zhuǎn)錄方法將編碼鏈轉(zhuǎn)錄成對(duì)應(yīng)的mRNA
messenger_rna = coding_dna.transcribe()
'''
Seq('AUGGCCAUUGUAAUGGGCCGCUGAAAGGGUGCCCGAUAG', IUPACUnambiguousRNA())
'''
#Seq 對(duì)象還包含了從mRNA逆向轉(zhuǎn)錄為DNA編碼鏈的方法。同樣,這僅僅是從U → T的替代并伴隨著字母表的變化
messenger_rna.back_transcribe()
'''
Out[30]: Seq('ATGGCCATTGTAATGGGCCGCTGAAAGGGTGCCCGATAG', IUPACUnambiguousDNA())
'''
#繼續(xù)使用在轉(zhuǎn)錄那個(gè)小節(jié)中的例子,現(xiàn)在讓我們將這個(gè)mRNA翻譯成相對(duì)應(yīng)的 蛋白質(zhì)序列
#rna編碼和DNA編碼都可以翻譯成對(duì)應(yīng)蛋白質(zhì)
protein_seq1=messenger_rna.translate()
'''
Seq('MAIVMGR*KGAR*', HasStopCodon(IUPACProtein(), '*'))
'''
#直接從DNA翻譯為蛋白質(zhì)
protein_seq2=coding_dna.translate()
#DNA重編程,使用mutable函數(shù)
my_seq = Seq("GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA", IUPAC.unambiguous_dna)
mutable_seq = my_seq.tomutable()
'''
Out[38]: MutableSeq('GCCATTGTAATGGGCCGCTGAAAGGGTGCCCGA', IUPACUnambiguousDNA())
'''
#更改DNA編碼
mutable_seq[5] = "C"
'''
MutableSeq('GCCATCGTAATGGGCCGCTGAAAGGGTGCCCGA', IUPACUnambiguousDNA())
'''
mutable_seq.remove("T")
mutable_seq.reverse()
mutable_seq