Source code for rpg.RapidPeptidesGenerator

# -*- coding: utf-8 -*-
#!/usr/bin/env python3.11

# Rapid Peptide Generator (RPG) is a software dedicated to predict     #
# cleavage sites of proteases. User can create his own enzyme,         #
# following a simple grammar.                                          #
#                                                                      #
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <>.                          #

"""Main file of RPG software, handle input/output and launch
necessary functions

__version_info__ = ('2', '0', '5')
__version__ = '.'.join(__version_info__)
__revision_date__ = "2024-06-12"
__author__ = "Nicolas Maillet"

import argparse
import os
import sys
import uuid
from pathlib import Path
#from context import rpg
from rpg import core
from rpg import digest
from rpg import enzyme
from rpg.enzymes_definition import AVAILABLE_ENZYMES
sys.path.insert(0, str(Path.home())) # Home path
from rpg_user import AVAILABLE_ENZYMES_USER

"""All available enzymes in RPG."""

[docs] def restricted_float(mc_val): """Restricts input miscleavage value to a float between 0 and 100. :param mc_val: value to test :type mc_val: float :return: the inputed value if correct :rtype: float :raises custom ValueError: if value is not between 0 and 100 :raises custom TypeError: if value is not a float """ try: mc_val = float(mc_val) if mc_val < 0 or mc_val > 100: core.handle_errors("miscleavage value should be between 0 and "\ "100.", 0, "Value ") return mc_val except ValueError: # Throw an error core.handle_errors("miscleavage value should be a float between 0 "\ "and 100.", 0, "Type ")
[docs] def restricted_int(mc_val): """Restricts input miscleavage value to a int. :param mc_val: value to test :type mc_val: int :return: the inputed value if correct :rtype: int :raises custom TypeError: if value is not an int """ try: mc_val = int(mc_val) if mc_val < 0 : core.handle_errors("miscleavage value should be greater then 0.", 0, "Value ") return mc_val except ValueError: # Throw an error core.handle_errors("miscleavage value should be an integer.", 0, "Type ")
[docs] def restricted_enzyme_id(enz_id): """Restrict input enzyme id to a str corresponding to an enzyme. :param mc_val: value to test :type mc_val: str :return: the inputed enzyme id :rtype: str """ ids_available = [] names_available = [] for i in ALL_ENZYMES: ids_available.append(str(i.id_)) names_available.append( if str(enz_id) not in ids_available and \ str(enz_id).casefold() not in names_available: core.handle_errors(f"id {enz_id} does not correspond to any "\ "enzyme. Use -l to get enzyme ids.", 0, "Input ") return str(enz_id)
[docs] def list_enzyme(): """Print all available enzymes""" for enz in ALL_ENZYMES: print("%i: %s" % (enz.id_,
[docs] def create_enzymes_to_use(enzymes, miscleavage): """Create the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use. Each enzyme can be associated to a miscleavage value. :param enzymes: enzymes ids or names chosen by user :param miscleavage: associated miscleavage values :type enzymes: list(str) :type miscleavage: list(float) :return: list of enzyme's id with associated miscleavage values :rtype: list(int) """ # Complete Enzymes to use (return) enzymes_to_use = [] if enzymes: # Too much miscleavage values if len(miscleavage) > len(enzymes): core.handle_errors("Too much miscleavage values. Last values" " will be ignored.") # Get only the first ones miscleavage = miscleavage[:len(enzymes)] cur_pos = -1 # Get all enzymes with a given miscleavage for i, _ in enumerate(miscleavage): # In all available enzymes for enz in ALL_ENZYMES: # Get the current one if str(enz.id_) == str(enzymes[i]) or \ == str(enzymes[i]).casefold(): # Change miscleavage ratio enz.ratio_miscleavage = miscleavage[i] # Add it enzymes_to_use.append(enz) cur_pos = i # Get all enzymes without miscleavage value for i in enzymes[cur_pos + 1:]: # In all available enzymes for enz in ALL_ENZYMES: # Get the current one if str(enz.id_) == str(i) or == str(i).casefold(): # Add it enzymes_to_use.append(enz) # Return enzymes to use return enzymes_to_use
# Not tested
[docs] def get_enzymes_to_use(mode, id_enz_selected, miscleavage): """Get the list of chosen :py:class:`~rpg.enzyme.Enzyme` to use. Each enzyme (and associated miscleavage value) are inputed by user. If there is a problem, user is interrogated again. :param mode: Digestion mode. If 'concurrent', no miscleavage values are used :param enzymes: enzyme's ids or names chosen by user :param miscleavage: associated miscleavage values :type mode: str :type enzymes: list(str) :type miscleavage: list(float) :return: list of enzyme's id with associated miscleavage values :rtype: list(int) """ # Get the correct Enzymes inputed enzymes_to_use = create_enzymes_to_use(id_enz_selected, miscleavage) # No good Enzymes inputed, let user choose if not enzymes_to_use: id_enz_inputed = [] # Print all available enzymes list_enzyme() # Ask user to give correct enzymes ids while not enzymes_to_use: id_enz_inp = input("Choose which enzyme(s) to use, separated by" " space (example: 1 5 6). (q) to quit:\n") # Quit if "q" in id_enz_inp: sys.exit(0) # Get a list of ids for i in id_enz_inp.split(" "): id_enz_inputed.append(i) mc_enz_inputed = [] if mode == "sequential": mc_enz_inp = input("Percentage of miscleavage per inputed" " enzyme (default 0), separated by sapce" " (example: 1.2 5 6):\n") if mc_enz_inp: # Get a list of int for i in mc_enz_inp.split(" "): try: # Convert it to int i = float(i) mc_enz_inputed.append(i) # Not an int? except ValueError: # Throw an error core.handle_errors("'%s' should be an floating" " value. This values will be" " ignored." % i) # Get the correct Enzyme if enzymes inputed enzymes_to_use = create_enzymes_to_use(id_enz_inputed, mc_enz_inputed) # Return Enzymes to use return enzymes_to_use
# Not tested
[docs] def main(): """Launcher of RapidPeptidesGenerator""" parser = argparse.ArgumentParser(description="This software takes protein " "sequences as input (-i optio" "n). All sequences will be cl" "eaved according to selected " "enzymes (-e option) and give" "n miscleavage percentage (" "-m option). Cleaving can be " "sequential or concurrent (-d" " option). Resulting peptides" " are outputted in a file (-o" " option) in fasta, csv or ts" "v format (-f option). Classi" "cal enzymes are included (-l" " option to print available e" "nzymes) but it is possible t" "o define other enzymes (-a o" "ption). See https://gitlab.p" " and " "https://rapid-peptide-genera" " for more " "informations.") group_launch = parser.add_mutually_exclusive_group(required=True) group_launch.add_argument("-a", "--addenzyme", action="store_true", help="Create a new enzyme. See https://rapid-pe"\ " for more inform"\ "ation") group_launch.add_argument("-b", "--delenzyme", metavar="str", help="Delete a user-define enzyme. See "\ " "\ "for more information") parser.add_argument("-d", "--digest", metavar="str", choices=['s', 'sequential', 'c', 'concurrent'], default="s", help="Digestion mode. Either 's', 'seque" "ntial', 'c' or 'concurrent' (default: s)") parser.add_argument("-e", "--enzymes", metavar="str", default=[], nargs='+', type=restricted_enzyme_id, help="Id or name of enzyme(s) to use (e.g. -e 1 Asp-N " "10 to use enzymes 1, Asp-N and 10). Use -l first" " to get enzyme ids or names") parser.add_argument("-f", "--fmt", metavar="str", choices=['fasta', 'csv', 'tsv'], default="fasta", help="Output file format. Either 'fasta', 'csv', or " "'tsv' (default: fasta)") group_launch.add_argument("-i", "--inputdata", metavar="path", help="Input file, in fasta / fastq format "\ "(gzipped or not)") group_launch.add_argument("-l", "--list", action="store_true", help="Display the list of available enzymes") group_launch.add_argument("-s", "--sequence", metavar="str", help="Input a single protein sequence without "\ "commentary") group_mc = parser.add_mutually_exclusive_group(required=False) group_mc.add_argument("-m", "--miscleavage", metavar="float", default=[], nargs='+', type=restricted_float, help="Percentage of miscleavage, between 0 and 100," " by enzyme(s). It should be in the same order than " "-enzymes options (e.g. -m 15 5 10). Only for sequenti" "al digestion (default: 0)") group_mc.add_argument("-t", "--theoretical", metavar="int", default=[], nargs='+', type=restricted_int, help="Maximum number of miscleavages, by enzyme(s). " "RPG will produce all theoretical peptides allowing " "at most -t miscleavages. It should be in the same " "order than -enzymes options (e.g. -t 1 3 2). Only " "for sequential digestion (default: 0)") parser.add_argument("-n", "--noninteractive", action='store_true', help="Non-interactive mode. No standard output, only " "error(s) (--quiet enable, overwrite -v). If output " "filename already exists, output file will be " "overwritten.") parser.add_argument("-x", "--cleave", type=str, metavar="str", default=[], nargs='+', help="Cleavage site(s) when adding enzyme in non-inte"\ "ractive (e.g. -x \"(D or E,)\" or -x \"(D,)\" \"(E,)\"") parser.add_argument("-z", "--exception", type=str, metavar="str", default=[], nargs='+', help="Exception(s) when adding enzyme in non-interact"\ "ive (e.g. -z \"(D,)(W)\" \"(D,)(A)\"") parser.add_argument("-y", "--enzname", type=str, metavar="str", default="", help="Enzyme's name when adding enzyme in non-interac"\ "tive.") parser.add_argument("-p", "--pka", metavar="str", choices=['ipc2', 'stryer', 'ipc'], default="ipc2", help="Define pKa values. Either 'ipc2' " "'stryer' or 'ipc' (default: ipc2)") parser.add_argument("-w", "--mass", metavar="str", choices=['avg', 'mono'], default="avg", help="Define atomic mass values. "\ "Either 'avg' for average mass or 'mono' for "\ "monoisotopic mass (default: avg)") group_output = parser.add_mutually_exclusive_group() group_output.add_argument("-o", "--outputfile", type=str, metavar="str", default="", help="Optional result file " "to output result peptides.") group_output.add_argument("-r", "--randomname", action="store_true", help="Random (not used) output file name") parser.add_argument("-c", "--processes", type=int, metavar="int", default=1, help="Number of parallel processes to use (default: 1)") group_verbose = parser.add_mutually_exclusive_group() group_verbose.add_argument("-q", "--quiet", action="store_true", help="No standard output, only error(s)") group_verbose.add_argument("-v", "--verbose", action="count", default=0, help="Increase output verbosity. -vv will " "increase more than -v") parser.add_argument("--version", action="version", version="%(prog)s " + __version__ + " from " + __revision_date__) args = parser.parse_args() # --addenzyme option if args.addenzyme: if args.noninteractive: # We need a name if not args.enzname: parser.error("argument -y/--enzname is mandatory") # We need at least a rule if not args.enzname: parser.error("argument -x/--cleave is mandatory") # Create the enzyme enzyme.user_creation_enzyme_non_interactive(ALL_ENZYMES, args.enzname, args.cleave, args.exception) else: enzyme.user_creation_enzyme(ALL_ENZYMES) sys.exit(0) # --delenzyme option if args.delenzyme: enzyme.delete_enzyme(ALL_ENZYMES, args.delenzyme) sys.exit(0) # --digest option mode = "sequential" if args.digest == "c" or args.digest == "concurrent": mode = "concurrent" args.miscleavage = [] # No miscleavage on concurrent, infinite time # --pka option aa_pka = core.AA_PKA_IPC_2 if args.pka == "stryer": aa_pka = core.AA_PKA_S elif args.pka == "ipc": aa_pka = core.AA_PKA_IPC # --mass option aa_mass = core.AA_MASS_AVERAGE water_mass = core.WATER_MASS if args.mass == "mono": aa_mass = core.AA_MASS_MONO water_mass = core.WATER_MASS_MONO # --list option if args.list: list_enzyme() sys.exit(0) # --nointeractive option if args.noninteractive: args.quiet = 1 args.verbose = 0 # Be sure to have at least 1 process if args.processes <= 0: parser.error("argument -c/--processes should be greater than 0") # input data input_data = None input_type = None if args.inputdata: if os.path.isfile(args.inputdata): input_data = args.inputdata input_type = "file" else: core.handle_errors("file not found (%s)." % args.inputdata, 0, "I"\ "nput ") elif args.sequence: input_data = args.sequence input_type = "sequence" # --outputfile / --randomname options output_file = "" # No output file (default) if args.randomname: # Generate a random file name output_file = str(uuid.uuid4().hex) + "." + args.fmt # Ensure the name is unique while os.path.isfile(output_file): # Generate a random file name output_file = str(uuid.uuid4().hex) + "." + args.fmt # Chosen file name if exist elif args.outputfile: # Given name tmpname = str(args.outputfile) # No extension? if "." not in tmpname: # Add default extension for this file format output_file = tmpname + "." + args.fmt else: output_file = tmpname # If interactive mode if not args.noninteractive: # This file already exist? while os.path.isfile(output_file): core.handle_errors("File '%s' already exit!" % output_file) # Don't overwrite it if input("Overwrite it? (y/n)\n") != "y": tmpname = input("Output filename?\n") # No extension? if "." not in tmpname: # Add default extension for this file format output_file = tmpname + "." + args.fmt else: output_file = tmpname # Overwrite it else: break # More mis cleavage than enzyme if len(args.miscleavage) > len(args.enzymes): core.handle_errors("Too much miscleavage values. Last values will " "be ignored.") args.miscleavage = args.miscleavage[:len(args.enzymes)] # Get all enzymes inputed enzymes_to_use = get_enzymes_to_use(mode, args.enzymes, args.miscleavage) # Output options if args.verbose: print("Input: " + input_data) print("Enzyme(s) used: " + str([ for enz in enzymes_to_use])) print("Mode: " + mode) print("miscleavage ratio: " + str([enz.ratio_miscleavage for enz in enzymes_to_use])) if output_file: print("Output file: " + os.path.abspath(output_file)) # Make the actual digestion of input data results_digestion = digest.digest_from_input(input_data, input_type, enzymes_to_use, mode, aa_pka, aa_mass, water_mass, args.processes) # We need to compute all theoretical peptides for at least one enzyme if args.theoretical: # More mis cleavage than enzyme if len(args.theoretical) > len(args.enzymes): core.handle_errors("Too much miscleavage values. Last values will " "be ignored.") # Not enough mis cleavage values, complete with 0 elif len(args.theoretical) < len(args.enzymes): args.theoretical += [0]*(len(args.enzymes) - len(args.theoretical)) # Dict of enzymes/max mis cleavage enzymes_to_use = {} # Link enzyme to mc value for i, j in enumerate(args.enzymes): enzymes_to_use[enzyme.get_enz_name(ALL_ENZYMES, j)] = args.theoretical[i] # Compute the results digest.theoretical_peptides(results_digestion, enzymes_to_use) # Output results core.output_results(output_file, results_digestion, args.fmt, args.quiet, args.verbose)
### Let'z go ### if __name__ == '__main__': main() # The end sys.exit(0)