Source code for rpg.sequence

# -*- coding: utf-8 -*-

########################################################################
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# This file is part of Rapid Peptide Generator (RPG) software.         #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Contains classes and function related to sequences"""
from rpg import core

[docs] class Peptide: """Definition of a peptide, containing the header of its original sequence, an amino acid sequence, the name of the enzyme used to produce it and more informations. :param header: header of the peptide :param sequence: sequence in amino acids :param enzyme_name: name of the enzyme used :param aa_pka: pKa values (IPC / IPC2 / Stryer) :param aa_mass: masse values (average / monoisotopic) :param water_mass: masse value of water (average / monoisotopic) :param nb_peptide: number of this peptide (default: 0) :param position: position of cleavage on the original sequence (default: 0) :type header: str :type sequence: str :type enzyme_name: str :type aa_pka: dict() :type aa_mass: dict() :type water_mass: dict() :type nb_peptide: int :type position: int :var size: size of the peptide :var mass: mass of the peptide :var p_i: pI of the peptide :vartype size: int :vartype mass: float :vartype p_i: float """ def __init__(self, header, sequence, enzyme_name, aa_pka, aa_mass, water_mass, nb_peptide=0, position=0): self.header = header # header of this peptide self.sequence = sequence # peptide sequence self.enzyme_name = enzyme_name # name of the enzyme used self.aa_pka = aa_pka # pKa values for pI calculation self.aa_mass = aa_mass # atomic mass value of all aa self.water_mass = water_mass # atomic mass value of water self.nb_peptide = nb_peptide # number of this peptide self.position = position # position of cleavage self.size = len(sequence) # size of the peptide # Mass of the peptide tmp_mass = water_mass for i in sequence: tmp_mass += aa_mass[i] self.mass = round(tmp_mass, 5) # mass of the peptide self.p_i = self.get_isoelectric_point() # self representation for print def __repr__(self): pka = "IPC2" if self.aa_pka == core.AA_PKA_S: pka = "Stryer" elif self.aa_pka == core.AA_PKA_IPC: pka = "IPC" return "Original header: " + self.header + "\nNo. peptide: " + \ str(self.nb_peptide) + "\nEnzyme: " + self.enzyme_name + \ "\nCleav. pos: " + str(self.position) + "\nPep. size: " + \ str(self.size) + "\nPep. mass: " + str(self.mass) + \ "\npKa values from: " + pka + "\nPep. pI: " + str(self.p_i) +\ "\nSequence: " + self.sequence + "\n" # Equality between two Peptides def __eq__(self, other): if isinstance(self, other.__class__): return self.__dict__ == other.__dict__ return False # Create a clean output according to fmt def __format__(self, fmt): ret = "" # Formating the print according to format if fmt == "fasta": ret += ">" separator = "_" elif fmt == "csv": separator = "," else: separator = "\t" # Main values to print ret += self.header + separator + str(self.nb_peptide) + separator + \ self.enzyme_name + separator + str(self.position) + separator + \ str(self.size) + separator + str(self.mass) + separator + \ str(self.p_i) # Last separator, \n for fasta format if fmt == "fasta": ret += "\n" else: ret += separator # End of the print ret += self.sequence return ret
[docs] def get_isoelectric_point(self): """Compute isoelectric point (pI) of the peptide using binary search. :return: computed pI :rtype: float :note: This function used :py:const:`~rpg.core.AA_PKA` """ ph_val = 7 # Neutral pH, starting point of binary search ph_min = 0.0 # Minimal pH ph_max = 14.0 # Maximal pH precision = 0.01 # While we are not precise enough while (ph_val-ph_min > precision) or (ph_max-ph_val > precision): # Compute the pI qn1 = -1.0 / (1.0 + pow(10, (self.aa_pka["Cterm"] - ph_val))) qn2 = -self.sequence.count('D') / (1.0 + pow(10, (self.aa_pka["D"]- ph_val))) qn3 = -self.sequence.count('E') / (1.0 + pow(10, (self.aa_pka["E"]- ph_val))) qn4 = -self.sequence.count('C') / (1.0 + pow(10, (self.aa_pka["C"]- ph_val))) qn5 = -self.sequence.count('Y') / (1.0 + pow(10, (self.aa_pka["Y"]- ph_val))) qp1 = self.sequence.count('H') / (1.0 + pow(10, (ph_val - self.aa_pka["H"]))) qp2 = 1.0 / (1.0 + pow(10, (ph_val - self.aa_pka["Nterm"]))) qp3 = self.sequence.count('K') / (1.0 + pow(10, (ph_val - self.aa_pka["K"]))) qp4 = self.sequence.count('R') / (1.0 + pow(10, (ph_val - self.aa_pka["R"]))) nq_final = qn1 + qn2 + qn3 + qn4 + qn5 + qp1 + qp2 + qp3 + qp4 # We are below solution, good pH value must be smaller if nq_final < 0.0: ph_max = ph_val ph_val -= (ph_max - ph_min) / 2 # We are above solution, good pH value must be bigger else: ph_min = ph_val ph_val += (ph_max - ph_min) / 2 # We got a good enough pH value return round(ph_val, 2)
[docs] class Sequence: """Definition of an amino acid sequence to digest. :param header: header of the sequence :param sequence: sequence itself :type header: str :type sequence: str """ def __init__(self, header, sequence): self.header = header # header of this peptide self.sequence = sequence # peptide sequence # self representation for print def __repr__(self): return "Header: " + self.header + "\nSequence: " + self.sequence + "\n" # Equality between two Sequences def __eq__(self, other): if isinstance(self, other.__class__): return self.__dict__ == other.__dict__ return False
[docs] def check_sequence(seq): """Validate an input sequence. Each amino acid should be in :py:const:`~rpg.core.AMINOACIDS`. :param seq: the sequence to check :type seq: str :return: Sequence in UPPERCASE :rtype: str """ validate = seq.strip().upper() for i in validate: if i not in core.AMINOACIDS: raise ValueError("amino acid \"%s\" in %s not "\ "recognized." % (i, validate)) return validate