# -*- coding: utf-8 -*-
########################################################################
# Author: Nicolas Maillet #
# Copyright © 2018 Institut Pasteur, Paris. #
# See the COPYRIGHT file for details #
# #
# This file is part of Rapid Peptide Generator (RPG) software. #
# #
# RPG is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
# any later version. #
# #
# RPG is distributed in the hope that it will be useful, #
# but WITHOUT ANY WARRANTY; without even the implied warranty of #
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
# GNU General Public License for more details. #
# #
# You should have received a copy of the GNU General Public license #
# along with RPG (LICENSE file). #
# If not, see <http://www.gnu.org/licenses/>. #
########################################################################
"""Contains class and functions related to enzymes definition and use"""
import os
import re
import sys
from pathlib import Path
from rpg import core
from rpg import rule
DEFUSERENZFILE = str(Path.home()) + "/rpg_user.py"
# Create the enzymes_user file if it does not exist
if not os.path.isfile(DEFUSERENZFILE):
with open(DEFUSERENZFILE, "w", encoding="Utf-8") as out_file:
out_file.write("from rpg import enzyme\nfrom rpg import rule\n"\
"from rpg import enzymes_definition"\
"\n\nAVAILABLE_ENZYMES_USER = []\nCPT_ENZ = enzymes_de"\
"finition.CPT_ENZ\n\n### ENZYMES DECLARATION ###\n")
[docs]
class Enzyme:
"""Definition of an cleaving enzyme containing specific rules.
:param id_: id of the enzyme
:param name: name of the enzyme
:param rules: cleaving rules
:param ratio_miscleavage: miscleavage ratio
:type id_: int
:type name: str
:type rules: list(:py:class:`~rpg.rule.Rule`)
:type ratio_miscleavage: float
"""
def __init__(self, id_, name, rules, ratio_miscleavage=0):
self.id_ = id_
self.name = name
self.ratio_miscleavage = ratio_miscleavage
self.rules = rules
# self representation for print
def __repr__(self):
return "Id: %s\nName: %s\nRatio Miscleavage: %.2f%%\nRules: %s\n" %\
(self.id_, self.name, self.ratio_miscleavage, self.rules)
# Equality between two Enzymes
def __eq__(self, other):
if isinstance(self, other.__class__):
return self.__dict__ == other.__dict__
return False
[docs]
def write_enzyme_in_user_file(self, enz_file=DEFUSERENZFILE):
"""Write enzyme to user's enzyme file as a Python function.
:param enz_file: location of user file (default: ~/rpg_user.py)
:type enz_file: str
"""
if self.rules != []:
# Comment and first line of the Enzyme
ret = "\n\n\n# User-defined enzyme " + self.name + "\nENZ = []\n\n"
# Write all the main rules and their su-rules
for i in self.rules:
ret += i.format_rule()
# Write the end of the Enzyme
ret += "ENZYME = enzyme.Enzyme(CPT_ENZ, \"" + self.name + "\", "\
"ENZ, 0)\n# Add it to available enzymes\nAVAILABLE_ENZYMES"\
"_USER.append(ENZYME)\nCPT_ENZ += 1\n"
# Write all in the user file
try:
with open(enz_file, "a") as output_file:
output_file.write(ret)
except IOError:
core.handle_errors("'%s' can't be open in '%s' mode" %
(enz_file, "a"), 0, "File ")
[docs]
def get_enz_name(all_enzymes, name):
""" Get the proper name of an enzyme
:param all_enzymes: all already existing enzymes
:param name: name or id of the enzyme
:type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`)
:type name: str
:return: The real name of an enzyme
:rtype: str
"""
enz_name = None
# Get the name
for i in all_enzymes:
# Get the name of this enzyme
if (str(name).isdigit() and i.id_ == int(name)) or \
i.name.casefold() == str(name).casefold():
enz_name = i.name.strip()
break
# Enzyme not found
if not enz_name:
core.handle_errors(f"Not able to find enzyme {name}.", 0)
# Return the correct name of this enzyme
return enz_name
[docs]
def check_enzyme_name(name_new_enz, all_name_enz):
"""Validate the name of a new enzyme.
:param name_new_enz: name of the new enzyme
:param all_name_enz: names of already created enzymes
:type name_new_enz: str
:type all_name_enz: list(str)
:return: True if name is correct
:rtype: bool
Enzyme name should not contains whitespace character (' ', \\\\t,
\\\\n, \\\\r, \\\\f, \\\\v), be empty, a digit or be already used.
"""
ret = True
# If the enzyme name is already taken
if name_new_enz in all_name_enz:
core.handle_errors("This name exist, please choose another name.", 2)
ret = False
# Does it contain only digit character?
if name_new_enz.isdigit():
core.handle_errors("Enzyme name can't be only digits, please choose"\
" another name.", 2)
ret = False
# Does it contain ' ' character?
res = re.search(" ", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Space character found at position " +
str(res.start() + 1) +
", please choose another name.", 2)
ret = False
# Does it contain \t character?
res = re.search("\t", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Tab character found at position " +
str(res.start() + 1) +
", please choose another name.", 2)
ret = False
res = name_new_enz.find("\\t")
if res != -1:
to_print = ""
for _ in range(res):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Tab character found at position " +
str(res + 1) + ", please choose another name.", 2)
ret = False
# Does it contain \n character?
res = re.search("\n", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Newline character found at position " +
str(res.start() + 1) +
", please choose another name.", 2)
ret = False
res = name_new_enz.find("\\n")
if res != -1:
to_print = ""
for _ in range(res):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Newline character found at position " +
str(res + 1) + ", please choose another name.", 2)
ret = False
# Does it contain \r character?
res = re.search("\r", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Carriage return (\\r) character found "
"at position " + str(res.start() + 1) +
", please choose another name.", 2)
ret = False
res = name_new_enz.find("\\r")
if res != -1:
to_print = ""
for _ in range(res):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Carriage return (\\r) character found "
"at position " + str(res + 1) +
", please choose another name.", 2)
ret = False
# Does it contain \f character?
res = re.search("\f", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Form feed (\\f) character found at "
"position " + str(res.start() + 1) +
", please choose another name.", 2)
ret = False
res = name_new_enz.find("\\f")
if res != -1:
to_print = ""
for _ in range(res):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Form feed (\\f) character found at "
"position " + str(res + 1) +
", please choose another name.", 2)
ret = False
# Does it contain \v character?
res = re.search("\v", name_new_enz)
if res:
to_print = ""
for _ in range(res.start()):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
"position " + str(res.start() + 1) +
", please choose another name.", 2)
ret = False
res = name_new_enz.find("\\v")
if res != -1:
to_print = ""
for _ in range(res):
to_print += " "
to_print += "^\n"
core.handle_errors(to_print + "Vertical Tab (\\v) character found at "
"position " + str(res + 1) +
", please choose another name.", 2)
ret = False
# Not empty
if name_new_enz == "":
core.handle_errors("Please choose a not empty name.", 2)
ret = False
return ret
# Not tested
[docs]
def user_creation_enzyme(all_enzymes):
"""Text-mod form to input a new enzyme.
.. warning:: Not tested
.. warning:: It could be a problem to immediately use the new enzyme (see in-code warning)
"""
add_enzyme = "y"
# All enzymes name
all_name_enz = set()
# Get all used names
for enz in all_enzymes:
all_name_enz.add(enz.name)
# Adding enzyme
while add_enzyme == "y":
# Name of the enzyme
name_new_enz = input("Name of the new enzyme?\n")
while not check_enzyme_name(name_new_enz, all_name_enz):
# Name of the enzyme
name_new_enz = input("Name of the new enzyme?\n")
# All the rules entered by user
all_rules = {}
# Input of user for creating rules
def_rule = "_"
while def_rule != "":
# Type of rule?
cutmp = ""
# Ensure we got a correct value i.e. c, e or q
while (cutmp != "c") and (cutmp != "e") and (cutmp != "q"):
cutmp = input("Create a cleaving rule (c) or an exception (e)?"
" (q) to quit:\n")
# Exit
if cutmp == "q":
break
# Set the cut to what the user ask: e = False
cut = False
# c = True
if cutmp == "c":
cut = True
# The rule is valid?
validate_rule = ""
# Until the rules is not properly defined:
while validate_rule == "":
# Cleaving rule
if cut:
def_rule = input("Write your cleaving rule,"
" (q) to quit:\n")
# Exception rule
else:
def_rule = input("Write your exception rule,"
" (q) to quit:\n")
# Quit?
if def_rule == "q":
break
# Check if input is coherent
validate_rule = rule.check_rule(def_rule)
# Add this rule
if validate_rule != "":
all_rules[validate_rule] = cut
# Get all the rules in correct format
correct_rules = rule.create_rules(all_rules)
# Create the enzyme with fake id (auto-inc)
# .. warning:: It could be a problem to immediately use the new enzyme
new_enz = Enzyme(-1, name_new_enz, correct_rules)
# Write in the user-defined enzymes file
new_enz.write_enzyme_in_user_file()
# Add it to known names
all_name_enz.add(new_enz.name)
# End of this new enzyme
add_enzyme = input("Add another enzyme? (y/n)\n")
[docs]
def user_creation_enzyme_non_interactive(all_enzymes, name_new_enz, rules, exceptions=None):
"""One-line input a new enzyme.
:param all_enzymes: all already existing enzymes
:param name: name of the enzyme to create
:param rules: rules defining the new enzyme
:param exceptions: exceptions defining the new enzyme
:type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`)
:type name: str
:type rules: list(str)
:type exceptions: list(str)
.. warning:: It could be a problem to immediately use the new enzyme (see in-code warning)
"""
# All enzymes name
all_name_enz = set()
# Get all used names
for enz in all_enzymes:
all_name_enz.add(enz.name)
# Is the name of the enzyme valid?
if not check_enzyme_name(name_new_enz, all_name_enz):
sys.exit(1)
# All the rules entered by user
all_rules = {}
# For all cleavage rules
for def_rule in rules:
# Validate this rule
validate_rule = rule.check_rule(def_rule)
# This rule is not valid, exit
if validate_rule == "":
sys.exit(1)
# Add this rule
all_rules[validate_rule] = True
# For all exceptions
if exceptions:
for def_rule in exceptions:
# Validate this exception
validate_rule = rule.check_rule(def_rule)
# This exception is not valid, exit
if validate_rule == "":
sys.exit(1)
# Add this exception
all_rules[validate_rule] = False
# Get all the rules in correct format
correct_rules = rule.create_rules(all_rules)
# Create the enzyme with fake id (auto-inc)
# .. warning:: It could be a problem to immediately use the new enzyme
new_enz = Enzyme(-1, name_new_enz, correct_rules)
# Write in the user-defined enzymes file
new_enz.write_enzyme_in_user_file()
# Add it to known names
all_name_enz.add(new_enz.name)
[docs]
def delete_enzyme(all_enzymes, name):
""" Delete an enzyme from user file
:param all_enzymes: all already existing enzymes
:param name: name or id of the enzyme to delete
:type all_enzymes: list(:py:class:`~rpg.enzyme.Enzyme`)
:type name: str
.. warning:: Partially tested, remove by ID can't be tested
"""
user_content = ""
# Get the whole content of user file
with open(DEFUSERENZFILE, encoding="Utf-8") as user_file:
for line in user_file:
user_content += line
# Get the name of the enzyme
enz_name = get_enz_name(all_enzymes, name)
# Beginning of what to remove
beg_to_detect = f"# User-defined enzyme {re.escape(enz_name)}\n"
# End of what to remove
end_to_detect = f"ENZYME = enzyme\.Enzyme\(CPT_ENZ, \"{re.escape(enz_name)}\", ENZ, 0\)\n"
# Get everything that needs to be removed
res = re.findall(fr"\n\n\n{beg_to_detect}.*{end_to_detect}.*?\n.*?\n.*?\n", user_content, re.DOTALL)
# We should have a single match
if len(res) == 1:
# Remove the content with escape to protect [ and more
new_user_content = re.sub(re.escape(res[0]), r"", user_content, re.DOTALL)
# Rewrite the user file
with open(DEFUSERENZFILE, "w", encoding="Utf-8") as user_file:
user_file.write(new_user_content)
# More than one hit, we can't do much automatically
else:
# Exit and invite the user to manually edit the file
core.handle_errors(f"Not able to remove enzyme {name}, please remove manually", 0)