Source code for rpg.enzyme

# -*- coding: utf-8 -*-

########################################################################
# Author: Nicolas Maillet                                              #
# Copyright © 2018 Institut Pasteur, Paris.                            #
# See the COPYRIGHT file for details                                   #
#                                                                      #
# This file is part of Rapid Peptide Generator (RPG) software.         #
#                                                                      #
# RPG is free software: you can redistribute it and/or modify          #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or    #
# any later version.                                                   #
#                                                                      #
# RPG is distributed in the hope that it will be useful,               #
# but WITHOUT ANY WARRANTY; without even the implied warranty of       #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the        #
# GNU General Public License for more details.                         #
#                                                                      #
# You should have received a copy of the GNU General Public license    #
# along with RPG (LICENSE file).                                       #
# If not, see <http://www.gnu.org/licenses/>.                          #
########################################################################

"""Contains class and functions related to enzymes definition and use"""
import os
import re
import sys
from pathlib import Path
from rpg import core
from rpg import rule

DEFUSERENZFILE = str(Path.home()) + "/rpg_user.py"

# Create the enzymes_user file if it does not exist
if not os.path.isfile(DEFUSERENZFILE):
    with open(DEFUSERENZFILE, "w", encoding="Utf-8") as out_file:
        out_file.write("from rpg import enzyme\nfrom rpg import rule\n"\
                       "from rpg import enzymes_definition"\
                       "\n\nAVAILABLE_ENZYMES_USER = []\nCPT_ENZ = enzymes_de"\
                       "finition.CPT_ENZ\n\n### ENZYMES DECLARATION ###\n")

[docs] class Enzyme: """Definition of an cleaving enzyme containing specific rules. :param id_: id of the enzyme :param name: name of the enzyme :param rules: cleaving rules :param ratio_miscleavage: miscleavage ratio :type id_: int :type name: str :type rules: list(:py:class:`~rpg.rule.Rule`) :type ratio_miscleavage: float """ def __init__(self, id_, name, rules, ratio_miscleavage=0): self.id_ = id_ self.name = name self.ratio_miscleavage = ratio_miscleavage self.rules = rules # self representation for print def __repr__(self): return "Id: %s\nName: %s\nRatio Miscleavage: %.2f%%\nRules: %s\n" %\ (self.id_, self.name, self.ratio_miscleavage, self.rules) # Equality between two Enzymes def __eq__(self, other): if isinstance(self, other.__class__): return self.__dict__ == other.__dict__ return False
[docs] def write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE): """Write enzyme to user's enzyme file as a Python function. :param enz_file: location of user file (default: ~/rpg_user.py) :type enz_file: str """ if self.rules != []: # Comment and first line of the Enzyme ret = "\n\n\n# User-defined enzyme " + self.name + "\nENZ = []\n\n" # Write all the main rules and their su-rules for i in self.rules: ret += i.format_rule() # Write the end of the Enzyme ret += "ENZYME = enzyme.Enzyme(CPT_ENZ, \"" + self.name + "\", "\ "ENZ, 0)\n# Add it to available enzymes\nAVAILABLE_ENZYMES"\ "_USER.append(ENZYME)\nCPT_ENZ += 1\n" # Write all in the user file try: with open(enz_file, "a") as output_file: output_file.write(ret) except IOError: core.handle_errors("'%s' can't be open in '%s' mode" % (enz_file, "a"), 0, "File ")
[docs] def get_enz_name(all_enzymes, name): """ Get the proper name of an enzyme :param all_enzymes: all already existing enzymes :param name: name or id of the enzyme :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`) :type name: str :return: The real name of an enzyme :rtype: str """ enz_name = None # Get the name for i in all_enzymes: # Get the name of this enzyme if (str(name).isdigit() and i.id_ == int(name)) or \ i.name.casefold() == str(name).casefold(): enz_name = i.name.strip() break # Enzyme not found if not enz_name: core.handle_errors(f"Not able to find enzyme {name}.", 0) # Return the correct name of this enzyme return enz_name
[docs] def check_enzyme_name(name_new_enz, all_name_enz): """Validate the name of a new enzyme. :param name_new_enz: name of the new enzyme :param all_name_enz: names of already created enzymes :type name_new_enz: str :type all_name_enz: list(str) :return: True if name is correct :rtype: bool Enzyme name should not contains whitespace character (' ', \\\\t, \\\\n, \\\\r, \\\\f, \\\\v), be empty, a digit or be already used. """ ret = True # If the enzyme name is already taken if name_new_enz in all_name_enz: core.handle_errors("This name exist, please choose another name.", 2) ret = False # Does it contain only digit character? if name_new_enz.isdigit(): core.handle_errors("Enzyme name can't be only digits, please choose"\ " another name.", 2) ret = False # Does it contain ' ' character? res = re.search(" ", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Space character found at position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False # Does it contain \t character? res = re.search("\t", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Tab character found at position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False res = name_new_enz.find("\\t") if res != -1: to_print = "" for _ in range(res): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Tab character found at position " + str(res + 1) + ", please choose another name.", 2) ret = False # Does it contain \n character? res = re.search("\n", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Newline character found at position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False res = name_new_enz.find("\\n") if res != -1: to_print = "" for _ in range(res): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Newline character found at position " + str(res + 1) + ", please choose another name.", 2) ret = False # Does it contain \r character? res = re.search("\r", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Carriage return (\\r) character found " "at position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False res = name_new_enz.find("\\r") if res != -1: to_print = "" for _ in range(res): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Carriage return (\\r) character found " "at position " + str(res + 1) + ", please choose another name.", 2) ret = False # Does it contain \f character? res = re.search("\f", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Form feed (\\f) character found at " "position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False res = name_new_enz.find("\\f") if res != -1: to_print = "" for _ in range(res): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Form feed (\\f) character found at " "position " + str(res + 1) + ", please choose another name.", 2) ret = False # Does it contain \v character? res = re.search("\v", name_new_enz) if res: to_print = "" for _ in range(res.start()): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Vertical Tab (\\v) character found at " "position " + str(res.start() + 1) + ", please choose another name.", 2) ret = False res = name_new_enz.find("\\v") if res != -1: to_print = "" for _ in range(res): to_print += " " to_print += "^\n" core.handle_errors(to_print + "Vertical Tab (\\v) character found at " "position " + str(res + 1) + ", please choose another name.", 2) ret = False # Not empty if name_new_enz == "": core.handle_errors("Please choose a not empty name.", 2) ret = False return ret
# Not tested
[docs] def user_creation_enzyme(all_enzymes): """Text-mod form to input a new enzyme. .. warning:: Not tested .. warning:: It could be a problem to immediately use the new enzyme (see in-code warning) """ add_enzyme = "y" # All enzymes name all_name_enz = set() # Get all used names for enz in all_enzymes: all_name_enz.add(enz.name) # Adding enzyme while add_enzyme == "y": # Name of the enzyme name_new_enz = input("Name of the new enzyme?\n") while not check_enzyme_name(name_new_enz, all_name_enz): # Name of the enzyme name_new_enz = input("Name of the new enzyme?\n") # All the rules entered by user all_rules = {} # Input of user for creating rules def_rule = "_" while def_rule != "": # Type of rule? cutmp = "" # Ensure we got a correct value i.e. c, e or q while (cutmp != "c") and (cutmp != "e") and (cutmp != "q"): cutmp = input("Create a cleaving rule (c) or an exception (e)?" " (q) to quit:\n") # Exit if cutmp == "q": break # Set the cut to what the user ask: e = False cut = False # c = True if cutmp == "c": cut = True # The rule is valid? validate_rule = "" # Until the rules is not properly defined: while validate_rule == "": # Cleaving rule if cut: def_rule = input("Write your cleaving rule," " (q) to quit:\n") # Exception rule else: def_rule = input("Write your exception rule," " (q) to quit:\n") # Quit? if def_rule == "q": break # Check if input is coherent validate_rule = rule.check_rule(def_rule) # Add this rule if validate_rule != "": all_rules[validate_rule] = cut # Get all the rules in correct format correct_rules = rule.create_rules(all_rules) # Create the enzyme with fake id (auto-inc) # .. warning:: It could be a problem to immediately use the new enzyme new_enz = Enzyme(-1, name_new_enz, correct_rules) # Write in the user-defined enzymes file new_enz.write_enzyme_in_user_file() # Add it to known names all_name_enz.add(new_enz.name) # End of this new enzyme add_enzyme = input("Add another enzyme? (y/n)\n")
[docs] def user_creation_enzyme_non_interactive(all_enzymes, name_new_enz, rules, exceptions=None): """One-line input a new enzyme. :param all_enzymes: all already existing enzymes :param name: name of the enzyme to create :param rules: rules defining the new enzyme :param exceptions: exceptions defining the new enzyme :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`) :type name: str :type rules: list(str) :type exceptions: list(str) .. warning:: It could be a problem to immediately use the new enzyme (see in-code warning) """ # All enzymes name all_name_enz = set() # Get all used names for enz in all_enzymes: all_name_enz.add(enz.name) # Is the name of the enzyme valid? if not check_enzyme_name(name_new_enz, all_name_enz): sys.exit(1) # All the rules entered by user all_rules = {} # For all cleavage rules for def_rule in rules: # Validate this rule validate_rule = rule.check_rule(def_rule) # This rule is not valid, exit if validate_rule == "": sys.exit(1) # Add this rule all_rules[validate_rule] = True # For all exceptions if exceptions: for def_rule in exceptions: # Validate this exception validate_rule = rule.check_rule(def_rule) # This exception is not valid, exit if validate_rule == "": sys.exit(1) # Add this exception all_rules[validate_rule] = False # Get all the rules in correct format correct_rules = rule.create_rules(all_rules) # Create the enzyme with fake id (auto-inc) # .. warning:: It could be a problem to immediately use the new enzyme new_enz = Enzyme(-1, name_new_enz, correct_rules) # Write in the user-defined enzymes file new_enz.write_enzyme_in_user_file() # Add it to known names all_name_enz.add(new_enz.name)
[docs] def delete_enzyme(all_enzymes, name): """ Delete an enzyme from user file :param all_enzymes: all already existing enzymes :param name: name or id of the enzyme to delete :type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`) :type name: str .. warning:: Partially tested, remove by ID can't be tested """ user_content = "" # Get the whole content of user file with open(DEFUSERENZFILE, encoding="Utf-8") as user_file: for line in user_file: user_content += line # Get the name of the enzyme enz_name = get_enz_name(all_enzymes, name) # Beginning of what to remove beg_to_detect = f"# User-defined enzyme {re.escape(enz_name)}\n" # End of what to remove end_to_detect = f"ENZYME = enzyme\.Enzyme\(CPT_ENZ, \"{re.escape(enz_name)}\", ENZ, 0\)\n" # Get everything that needs to be removed res = re.findall(fr"\n\n\n{beg_to_detect}.*{end_to_detect}.*?\n.*?\n.*?\n", user_content, re.DOTALL) # We should have a single match if len(res) == 1: # Remove the content with escape to protect [ and more new_user_content = re.sub(re.escape(res[0]), r"", user_content, re.DOTALL) # Rewrite the user file with open(DEFUSERENZFILE, "w", encoding="Utf-8") as user_file: user_file.write(new_user_content) # More than one hit, we can't do much automatically else: # Exit and invite the user to manually edit the file core.handle_errors(f"Not able to remove enzyme {name}, please remove manually", 0)