Source code for modules.gemtractor.network.network

# This file is part of the GEMtractor
# Copyright (C) 2019 Martin Scharm <https://binfalse.de>
# 
# The GEMtractor is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# The GEMtractor is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import logging
import re

from .gene import Gene
from .genecomplex import GeneComplex
from .reaction import Reaction
from .species import Species
from ..utils import Utils

from libsbml import LIBSBML_OPERATION_SUCCESS, SBMLDocument, SBMLWriter


[docs]class Network: """ a class representing a network """ def __init__ (self): self.__logger = logging.getLogger(__name__) self.species = {} self.reactions = {} self.genes = {} self.gene_complexes = {} self.have_gene_net = False self.have_reaction_net = False self.__annotation_about_pattern = re.compile (r"<rdf:Description rdf:about=['\"]#[^'\"]+['\"]>", re.IGNORECASE)
[docs] def add_species (self, identifier, name): """ adds a species to the network if there is not yet a species with that identifier, it will create a new species and add it to the internal structure eventually, the species with the corresponding identifier is returned (may be an already existing species, though) :param identifier: the id of the species :param name: the name of the species :type identifier: str :type name: str :return: the species with id 'identifier' :rtype: :class:`.species.Species` """ if identifier not in self.species: self.species[identifier] = Species (identifier, name) return self.species[identifier]
[docs] def add_reaction (self, identifier, name): """ adds a reaction to the network if there is not yet a reaction with that identifier, it will create a new reaction and add it to the internal structure eventually, the reaction with the corresponding identifier is returned (may be an already existing reaction, though) :param identifier: the id of the reaction :param name: the name of the reaction :type identifier: str :type name: str :return: the reaction with id 'identifier' :rtype: :class:`.reaction.Reaction` """ if identifier not in self.reactions: self.reactions[identifier] = Reaction (identifier, name) return self.reactions[identifier]
[docs] def add_gene (self, gene): """ adds a gene/gene-product/enzyme to the network if there is not yet a gene with that identifier, it will create a new gene and add it to the internal structure eventually, the gene with the corresponding identifier is returned (may be an already existing gene, though) :param identifier: the id of the gene :param name: the name of the gene :type identifier: str :type name: str :return: the gene with id 'identifier' :rtype: :class:`.gene.Gene` """ if gene.identifier not in self.genes: self.genes[gene.identifier] = Gene (gene.identifier) return self.genes[gene.identifier]
[docs] def add_genes (self, reaction, gene_complexes): """ adds multiple genes to this network and to a reaction iterates all genes and complexes from 'gene_complexes' in 'gene_complexes' and - creates new genes if necessary (using :func:`add_gene`) - creates new gene complexes and their genes if necessary (using :func:`add_gene`) - links the gene and the reaction :param reaction: the reaction that is catalyzed by gene_complexes :param gene_complexes: list of (mixed) genes and gene complexes that catalyze the reaction :type reaction: :class:`.reaction.Reaction` :type gene_complexes: list of :class:`.gene.Gene` and :class:`.genecomplex.GeneComplex` """ for gc in gene_complexes: if type (gc) is Gene: g = self.add_gene (gc) reaction.genes.append (g.identifier) g.reactions.append (reaction.identifier) elif type (gc) is GeneComplex: if len (gc.genes) == 1: g = self.add_gene (next(iter(gc.genes))) reaction.genes.append (g.identifier) g.reactions.append (reaction.identifier) else: gcomplex = GeneComplex () for g in gc.genes: gcomplex.add_gene (self.add_gene (g)) gcomplex.calc_id () reaction.genec.append (gcomplex.identifier) gcomplex.reactions.append (reaction.identifier) self.gene_complexes[gcomplex.identifier] = (gcomplex) else: raise RuntimeError ("unexpected gene type: " + type (gc))
[docs] def serialize (self): """ serialize to a JSON-dumpable object the object will contain the following information: - species: array of :class:`.species.Species` in the network - reactions: array of :class:`.reaction.Reaction` in the network - enzs: array of :class:`.gene.Gene` in the network - enzc: array of :class:`.genecomplex.GeneComplex` in the network :return: JSON-dumpable object :rtype: dict """ self.__logger.debug ("serialising the network") json = { "species": [], "reactions": [], "enzs": [], "enzc": [], } species_mapper = {} reaction_mapper = {} gene_mapper = {} gene_complex_mapper = {} for identifier, species in self.species.items (): self.__logger.debug ("serialising species " + identifier) s_ser = species.serialize () species_mapper[identifier] = len (json["species"]) json["species"].append (s_ser) for identifier, gene in self.genes.items (): self.__logger.debug ("serialising gene " + identifier) g_ser = gene.serialize () gene_mapper[identifier] = len (json["enzs"]) json["enzs"].append (g_ser) for identifier, gene_complex in self.gene_complexes.items (): self.__logger.debug ("serialising gene complex " + identifier) g_ser = gene_complex.serialize (gene_mapper) gene_complex_mapper[identifier] = len (json["enzc"]) json["enzc"].append (g_ser) # add gene-genecomplex information for g in gene_complex.genes: json["enzs"][gene_mapper[g.identifier]]["cplx"].append (gene_complex_mapper[identifier]) for identifier, reaction in self.reactions.items (): self.__logger.debug ("serialising reaction " + identifier) # json["reactions"][reaction.num] = reaction.serialize () r_ser = reaction.serialize (species_mapper, gene_mapper, gene_complex_mapper) reaction_mapper[identifier] = len (json["reactions"]) json["reactions"].append (r_ser) # further reduce return size: replace reaction ids in species occurrences for s in json["species"]: o = [] for occ in s["occ"]: o.append (reaction_mapper[occ]) s["occ"] = o # further reduce return size: replace reaction ids in gene occurrences for g in json["enzs"]: o = [] for occ in g["reactions"]: o.append (reaction_mapper[occ]) g["reactions"] = o for g in json["enzc"]: o = [] for occ in g["reactions"]: o.append (reaction_mapper[occ]) g["reactions"] = o return json
[docs] def calc_reaction_net (self): """ Calculate the reaction-centric network Let's say you have this network: - A -r-> B (reaction r turns A into B) - B -s-> C (reaction s turns B into C) then the reaction-centric network will be: - r -> s (r links to s) """ self.__logger.info ("calc reaction net") num = 0 for identifier, reaction in self.reactions.items (): num += 1 if num % 100 == 0: self.__logger.info ("calc reaction net " + str (num)) self.__logger.debug ("calc reaction net " + reaction.identifier) for species in reaction.consumed: self.species[species]._consumption["r"].add (identifier) if reaction.reversible: self.species[species]._production["r"].add (identifier) for species in reaction.produced: self.species[species]._production["r"].add (identifier) if reaction.reversible: self.species[species]._consumption["r"].add (identifier) for identifier, species in self.species.items (): for consumption in species._consumption["r"]: reaction = self.reactions[consumption] for production in species._production["r"]: reaction.links.add (self.reactions[production]) self.have_reaction_net = True
[docs] def calc_genenet (self): """ Calculate the enzyme-centric network Let's say you have this network: - A -r-> B (reaction r turns A into B) - B -s-> C (reaction s turns B into C) and let's assume the reactions are catalyzed by the following enzymes: - r: V or W - s: X and (Y or Z) then the enzyme-centric network will be: - V -> X+Y (V links to X+Y) - V -> X+Z (V links to X+Z) - W -> X+Y (W links to X+Y) - W -> X+Z (W links to X+Z) """ self.__logger.info ("calc gene net") num = 0 for identifier, reaction in self.reactions.items (): num += 1 if num % 100 == 0: self.__logger.info ("calc gene associations for reaction " + str (num)) self.__logger.debug ("calc gene associations for reaction " + reaction.identifier) for gene in reaction.genes: self.__logger.debug ("processing gene " + gene) for species in reaction.consumed: s = self.species[species] s._consumption["g"].add (gene) if reaction.reversible: s._production["g"].add (gene) for species in reaction.produced: s = self.species[species] s._production["g"].add (gene) if reaction.reversible: s._consumption["g"].add (gene) for gene in reaction.genec: self.__logger.debug ("processing gene complex " + gene) for species in reaction.consumed: s = self.species[species] s._consumption["gc"].add (gene) if reaction.reversible: s._production["gc"].add (gene) for species in reaction.produced: s = self.species[species] s._production["gc"].add (gene) if reaction.reversible: s._consumption["gc"].add (gene) self.__logger.info ("got gene associations") for identifier, species in self.species.items (): for consumption in species._consumption["g"]: for production in species._production["g"]: self.genes[production].links["g"].add (self.genes[consumption]) for production in species._production["gc"]: self.gene_complexes[production].links["g"].add (self.genes[consumption]) for consumption in species._consumption["gc"]: for production in species._production["g"]: self.genes[production].links["gc"].add (self.gene_complexes[consumption]) for production in species._production["gc"]: self.gene_complexes[production].links["gc"].add (self.gene_complexes[consumption]) # ~ self.genenet[production]["links"].add (consumption) self.__logger.info ("got gene net") self.have_gene_net = True
[docs] def export_mn_dot (self, file_path): """ export the metabolite-reaction network in DOT format :param file_path: where to store the exported format? :type file_path: str """ nodemap = {} with open(file_path, 'w') as f: f.write ("digraph GEMtractor {\n") #TODO comment incl time and version? for identifier, species in self.species.items (): nodemap[identifier] = 's' + identifier f.write ("\t" + nodemap[identifier] + " [label=\""+identifier+"\"];\n") for identifier, reaction in self.reactions.items (): rid = 'r' + identifier f.write ("\t" + rid + " [label=\""+identifier+"\" shape=box];\n") for s in reaction.consumed: f.write ("\t" + nodemap[s] + " -> " + rid + ";\n") for s in reaction.produced: f.write ("\t" + rid + " -> " + nodemap[s] + ";\n") f.write ("}\n")
[docs] def export_rn_dot (self, file_path): """ export the reaction-centric network in DOT format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_reaction_net: self.calc_reaction_net () with open(file_path, 'w') as f: f.write ("digraph GEMtractor {\n") for identifier, reaction in self.reactions.items (): f.write ("\t" + identifier + " [label=\""+reaction.name+"\"];\n") for identifier, reaction in self.reactions.items (): for r in reaction.links: f.write ("\t" + identifier + " -> " + r.identifier + ";\n") f.write ("}\n")
[docs] def export_en_dot (self, file_path): """ export the enzyme-centric network in DOT format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_gene_net: self.calc_genenet () nodemap = {} with open(file_path, 'w') as f: f.write ("digraph GEMtractor {\n") #TODO comment incl time and version? num = 0 for gene in self.genes: num = num + 1 nodemap[gene] = 'g' + str(num) f.write ("\t" + nodemap[gene] + " [label=\""+gene+"\"];\n") for gene in self.gene_complexes: num = num + 1 nodemap[gene] = 'gc' + str(num) f.write ("\t" + nodemap[gene] + " [label=\""+gene+"\"];\n") for gene in self.genes: for associated in self.genes[gene].links["g"]: f.write ("\t" + nodemap[gene] + " -> " + nodemap[associated.identifier] + ";\n") for associated in self.genes[gene].links["gc"]: f.write ("\t" + nodemap[gene] + " -> " + nodemap[associated.identifier] + ";\n") for gene in self.gene_complexes: for associated in self.gene_complexes[gene].links["g"]: f.write ("\t" + nodemap[gene] + " -> " + nodemap[associated.identifier] + ";\n") for associated in self.gene_complexes[gene].links["gc"]: f.write ("\t" + nodemap[gene] + " -> " + nodemap[associated.identifier] + ";\n") f.write ("}\n")
[docs] def export_mn_gml (self, file_path): """ export the metabolite-reaction network in GML format :param file_path: where to store the exported format? :type file_path: str """ nodemap = {} with open(file_path, 'w') as f: f.write (Network.create_gml_prefix ()) #TODO comment incl time and version? num = 0 for identifier, species in self.species.items (): num += 1 nodemap[identifier] = str (num) f.write (Network.create_gml_node (nodemap[identifier], "species", "ellipse", identifier)) for identifier, reaction in self.reactions.items (): num += 1 rid = str (num) f.write (Network.create_gml_node (rid, "reaction", "rectangle", identifier)) for s in reaction.consumed: f.write (Network.create_gml_edge (nodemap[s], rid)) for s in reaction.produced: f.write (Network.create_gml_edge (rid, nodemap[s])) f.write ("]\n")
[docs] def export_rn_gml (self, file_path): """ export the reaction-centric network in GMl format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_reaction_net: self.calc_reaction_net () with open(file_path, 'w') as f: f.write (Network.create_gml_prefix ()) nodemap = {} num = 0 for identifier, reaction in self.reactions.items (): num += 1 nodemap[identifier] = str (num) f.write (Network.create_gml_node (nodemap[identifier], "reaction", "ellipse", reaction.name)) for identifier, reaction in self.reactions.items (): for r in reaction.links: f.write (Network.create_gml_edge (nodemap[identifier], nodemap[r.identifier])) f.write ("]\n")
[docs] def export_en_gml (self, file_path): """ export the enzyme-centric network in GML format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_gene_net: self.calc_genenet () nodemap = {} with open(file_path, 'w') as f: f.write (Network.create_gml_prefix ()) #TODO comment incl time and version? num = 0 for gene in self.genes: num += 1 nodemap[gene] = str (num) f.write (Network.create_gml_node (nodemap[gene], "enzyme", "ellipse", gene)) for gene in self.gene_complexes: num += 1 nodemap[gene] = str (num) f.write (Network.create_gml_node (nodemap[gene], "enzyme_complex", "ellipse", gene)) for gene in self.genes: for associated in self.genes[gene].links["g"]: f.write (Network.create_gml_edge (nodemap[gene], nodemap[associated.identifier])) for associated in self.genes[gene].links["gc"]: f.write (Network.create_gml_edge (nodemap[gene], nodemap[associated.identifier])) for gene in self.gene_complexes: for associated in self.gene_complexes[gene].links["g"]: f.write (Network.create_gml_edge (nodemap[gene], nodemap[associated.identifier])) for associated in self.gene_complexes[gene].links["gc"]: f.write (Network.create_gml_edge (nodemap[gene], nodemap[associated.identifier])) f.write ("]\n")
[docs] @staticmethod def create_gml_prefix (): """ create the prefix for a GML file :return: the preamble string for GML files :rtype: str """ n = "graph [\n" #TODO time and version? n = n + "\tcomment \"generated using the GEMtractor\"\n" n = n + "\tdirected 1\n" return n
[docs] @staticmethod def create_gml_node (nid, ntype, nshape, nlabel): """ create a GML node :param nid: the identifier of the node in the network :param ntype: the node's type :param nshape: the node's shape :param nlabel: the node's label :type nid: str :type ntype: str :type nshape: str :type nlabel: str :return: the GML representation of the network node :rtype: str """ n = "\tnode [\n" n = n + "\t\tid " + nid + "\n" n = n + "\t\tlabel \""+nlabel+"\"\n" n = n + "\t]\n" return n
[docs] @staticmethod def create_gml_edge (source, target): """ create a GML edge :param source: the identifier of the source node in the network :param target: the identifier of the target node in the network :type source: str :type target: str :return: the GML representation of the edge :rtype: str """ n = "\tedge [\n" n = n + "\t\tsource "+source+"\n" n = n + "\t\ttarget "+target+"\n" n = n + "\t]\n" return n
[docs] def export_mn_graphml (self, file_path): """ export the metabolite-reaction network in GraphML format :param file_path: where to store the exported format? :type file_path: str """ nodemap = {} with open(file_path, 'w') as f: f.write (Network.create_graphml_prefix ()) #TODO comment incl time and version? for identifier, species in self.species.items (): nodemap[identifier] = 's' + identifier f.write (Network.create_graphml_node (nodemap[identifier], "species", "ellipse", identifier)) num = 0 for identifier, reaction in self.reactions.items (): rid = 'r' + identifier f.write (Network.create_graphml_node (rid, "reaction", "rectangle", identifier)) for s in reaction.consumed: num = num + 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + nodemap[s] + "\" target=\"" + rid + "\"/>\n") for s in reaction.produced: num = num + 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + rid + "\" target=\"" + nodemap[s] + "\"/>\n") f.write ("\t</graph>\n</graphml>\n")
[docs] def export_rn_graphml (self, file_path): """ export the reaction-centric network in GraphML format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_reaction_net: self.calc_reaction_net () with open(file_path, 'w') as f: f.write (Network.create_graphml_prefix ()) for identifier, reaction in self.reactions.items (): f.write (Network.create_graphml_node (identifier, "reaction", "ellipse", reaction.name)) num = 0 for identifier, reaction in self.reactions.items (): for r in reaction.links: num = num + 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + identifier + "\" target=\"" + r.identifier + "\"/>\n") f.write ("\t</graph>\n</graphml>\n")
[docs] def export_en_graphml (self, file_path): """ export the enzyme-centric network in GraphML format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_gene_net: self.calc_genenet () nodemap = {} with open(file_path, 'w') as f: f.write (Network.create_graphml_prefix ()) #TODO comment incl time and version? num = 0 for gene in self.genes: num += 1 nodemap[gene] = 'g' + str (num) f.write (Network.create_graphml_node (nodemap[gene], "enzyme", "ellipse", gene)) for gene in self.gene_complexes: num += 1 nodemap[gene] = 'gc' + str (num) f.write (Network.create_graphml_node (nodemap[gene], "enzyme_complex", "ellipse", gene)) num = 0 for gene in self.genes: for associated in self.genes[gene].links["g"]: num += 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + nodemap[gene] + "\" target=\"" + nodemap[associated.identifier] + "\"/>\n") for associated in self.genes[gene].links["gc"]: num += 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + nodemap[gene] + "\" target=\"" + nodemap[associated.identifier] + "\"/>\n") for gene in self.gene_complexes: for associated in self.gene_complexes[gene].links["g"]: num += 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + nodemap[gene] + "\" target=\"" + nodemap[associated.identifier] + "\"/>\n") for associated in self.gene_complexes[gene].links["gc"]: num += 1 f.write ("\t\t<edge id=\"e" + str(num) + "\" source=\"" + nodemap[gene] + "\" target=\"" + nodemap[associated.identifier] + "\"/>\n") f.write ("\t</graph>\n</graphml>\n")
[docs] @staticmethod def create_graphml_prefix (): """ create the prefix for a GraphML file :return: the preamble string for GraphML files :rtype: str """ #TODO time and version? n = "<graphml xmlns=\"http://graphml.graphdrawing.org/xmlns\"\n" n = n + "\txmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n" n = n + "\txsi:schemaLocation=\"http://graphml.graphdrawing.org/xmlns/1.0/graphml.xsd\"\n" n = n + "\txmlns:y=\"http://www.yworks.com/xml/graphml\">\n\n" n = n + "\t<key for=\"node\" id=\"layout\" yfiles.type=\"nodegraphics\"/>\n" n = n + "\t<key for=\"node\" id=\"type\" attr.type=\"string\"><default>species</default></key>\n" n = n + "\t<graph id=\"GEMtractor\" edgedefault=\"directed\">\n" return n
[docs] @staticmethod def create_graphml_node (nid, ntype, nshape, nlabel): """ create a GraphML node :param nid: the identifier of the node in the network :param ntype: the node's type :param nshape: the node's shape :param nlabel: the node's label :type nid: str :type ntype: str :type nshape: str :type nlabel: str :return: the GraphML representation of the network node :rtype: str """ n = "\t\t<node id=\"" + nid + "\">\n" n = n + "\t\t\t<data key=\"type\">"+ntype+"</data>\n" n = n + "\t\t\t<data key=\"layout\">\n" n = n + "\t\t\t\t<y:ShapeNode>\n" n = n + "\t\t\t\t\t<y:Shape type=\""+nshape+"\"/>\n" n = n + "\t\t\t\t\t<y:NodeLabel>"+nlabel+"</y:NodeLabel>\n" n = n + "\t\t\t\t</y:ShapeNode>\n" n = n + "\t\t\t</data>\n" n = n + "\t\t</node>\n" return n
[docs] def export_rn_sbml (self, file_path, gemtractor, model_id, model_name = None, filter_species = None, filter_reactions = None, filter_genes = None, filter_gene_complexes = None, remove_reaction_enzymes_removed = True, remove_ghost_species = False, discard_fake_enzymes = False, remove_reaction_missing_species = False, removing_enzyme_removes_complex = True): """ export the reaction-centric network in SBML format will attach the trimming-settings as SBML note writes the document using the `libsbml:SBMLWriter <http://sbml.org/Special/Software/libSBML/docs/python-api/class_s_b_m_l_writer.html>` -- returns the result of `libsbml:SBMLWriter.writeSBML <http://sbml.org/Special/Software/libSBML/docs/python-api/class_s_b_m_l_writer.html#a02d1998aee7656d7b9c3ac69d62bb66f>`_ :param file_path: where to store the exported format? :param model_id: the model's identifier, will be postfixed with a greeting from us :param model_name: the model's name, will be prefixed with a greeting from us :param filter_species: species identifiers to get rid of :param filter_reactions: reaction identifiers to get rid of :param filter_genes: enzyme identifiers to get rid of :param filter_gene_complexes: enzyme-complex identifiers to get rid of, every list-item should be of format: 'A + B + gene42' :param remove_reaction_enzymes_removed: should we remove a reaction if all it's genes were removed? :param remove_ghost_species: should species be removed, that do not participate in any reaction anymore - even though they might be required in other entities? :param discard_fake_enzymes: should fake enzymes (implicitly assumes enzymes, if no enzymes are annotated to a reaction) be removed? :param remove_reaction_missing_species: remove a reaction if one of the participating genes was removed? :param removing_enzyme_removes_complex: if an enzyme is removed, should also all enzyme complexes be removed in which it participates? :type file_path: str :type model_id: str :type model_name: str :type filter_species: list of str :type filter_reactions: list of str :type filter_genes: list of str :type filter_gene_complexes: list of str :type remove_reaction_enzymes_removed: bool :type remove_ghost_species: bool :type discard_fake_enzymes: bool :type remove_reaction_missing_species: bool :type removing_enzyme_removes_complex: bool :return: true on success, otherwise false :rtype: bool """ if not self.have_reaction_net: self.calc_reaction_net () sbml = SBMLDocument () model = sbml.createModel () #TODO dc modified? if model is None: self.__logger.error ("could not create model...") return False model.setId (model_id + "_GEMtracted_ReactionNetwork") if model_name is None: model_name = model_id model.setName ("GEMtracted ReactionNetwork of " + model_name) Utils.add_model_note (model, filter_species, filter_reactions, filter_genes, filter_gene_complexes, remove_reaction_enzymes_removed, remove_ghost_species, discard_fake_enzymes, remove_reaction_missing_species, removing_enzyme_removes_complex) compartment = model.createCompartment() compartment.setId('compartment') compartment.setConstant(True) nodemap = {} for identifier, reaction in self.reactions.items (): nodemap[identifier] = self.__create_sbml_reaction_species (model, identifier, reaction.name, compartment, gemtractor) num = 0 for identifier, reaction in self.reactions.items (): for r in reaction.links: num += 1 Network.create_sbml_reaction (model, 'r' + str (num), nodemap[identifier], nodemap[r.identifier]) return SBMLWriter().writeSBML (sbml, file_path)
def __create_sbml_reaction_species (self, model, identifier, name, compartment, gemtractor): """ create an SBML reaction species for the Reaction-centric network :param model: the SBML model :param identifier: the identifier of the reaction :param name: the reaction's name :param compartment: the compartment in which the reaction will appear :param gemtractor: the GEMtractor :type model: `libsbml:Model <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_model.html>`_ :type identifier: str :type name: str :type compartment: `libsbml:Compartment <sbml.org/Software/libSBML/docs/python-api/classlibsbml_1_1_compartment.html>`_ :type gemtractor: :class:`.gemtractor.GEMtractor` :return: the SBML species representing a reaction :rtype: `libsbml:Species <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_species.html>`_ """ g = model.createSpecies () g.setId (identifier) g.setMetaId (identifier) g.setName (name) g.setCompartment(compartment.getId()) g.setHasOnlySubstanceUnits(False) g.setBoundaryCondition(False) g.setConstant(False) annotations = gemtractor.get_reaction_annotations (identifier) if annotations is not None: annotations = self.__annotation_about_pattern.sub('<rdf:Description rdf:about="#'+identifier+'">', annotations) if g.setAnnotation (annotations) != LIBSBML_OPERATION_SUCCESS: self.__logger.error ("unable to add annotation to reaction " + identifier) self.__logger.debug ("annotation was: " + annotations) return g
[docs] def export_en_sbml (self, file_path, gemtractor, model_id, model_name = None, filter_species = None, filter_reactions = None, filter_genes = None, filter_gene_complexes = None, remove_reaction_enzymes_removed = True, remove_ghost_species = False, discard_fake_enzymes = False, remove_reaction_missing_species = False, removing_enzyme_removes_complex = True): """ export the enzyme-centric network in SBML format will attach the trimming-settings as SBML note writes the document using the `libsbml:SBMLWriter <http://sbml.org/Special/Software/libSBML/docs/python-api/class_s_b_m_l_writer.html>` -- returns the result of `libsbml:SBMLWriter.writeSBML <http://sbml.org/Special/Software/libSBML/docs/python-api/class_s_b_m_l_writer.html#a02d1998aee7656d7b9c3ac69d62bb66f>`_ :param file_path: where to store the exported format? :param model_id: the model's identifier, will be postfixed with a greeting from us :param model_name: the model's name, will be prefixed with a greeting from us :param filter_species: species identifiers to get rid of :param filter_reactions: reaction identifiers to get rid of :param filter_genes: enzyme identifiers to get rid of :param filter_gene_complexes: enzyme-complex identifiers to get rid of, every list-item should be of format: 'A + B + gene42' :param remove_reaction_enzymes_removed: should we remove a reaction if all it's genes were removed? :param remove_ghost_species: should species be removed, that do not participate in any reaction anymore - even though they might be required in other entities? :param discard_fake_enzymes: should fake enzymes (implicitly assumes enzymes, if no enzymes are annotated to a reaction) be removed? :param remove_reaction_missing_species: remove a reaction if one of the participating genes was removed? :param removing_enzyme_removes_complex: if an enzyme is removed, should also all enzyme complexes be removed in which it participates? :type file_path: str :type model_id: str :type model_name: str :type filter_species: list of str :type filter_reactions: list of str :type filter_genes: list of str :type filter_gene_complexes: list of str :type remove_reaction_enzymes_removed: bool :type remove_ghost_species: bool :type discard_fake_enzymes: bool :type remove_reaction_missing_species: bool :type removing_enzyme_removes_complex: bool :return: true on success, otherwise false :rtype: bool """ if not self.have_gene_net: self.calc_genenet () sbml = SBMLDocument () model = sbml.createModel () #TODO dc modified? if model is None: self.__logger.error ("could not create model...") return False model.setId (model_id + "_GEMtracted_EnzymeNetwork") if model_name is None: model_name = model_id model.setName ("GEMtracted EnzymeNetwork of " + model_name) # print ("adding note to en sbml") Utils.add_model_note (model, filter_species, filter_reactions, filter_genes, filter_gene_complexes, remove_reaction_enzymes_removed, remove_ghost_species, discard_fake_enzymes, remove_reaction_missing_species, removing_enzyme_removes_complex) nodemap = {} compartment = model.createCompartment() compartment.setId('compartment') compartment.setConstant(True) num = 0 for gene in self.genes: num += 1 nodemap[gene] = self.__create_sbml_gene (model, 'g' + str (num), gene, compartment, gemtractor) # TODO: add other information if available for gene in self.gene_complexes: num += 1 nodemap[gene] = self.__create_sbml_gene_complex (model, 'gc' + str (num), gene, compartment, gemtractor, self.gene_complexes[gene].genes, nodemap) # TODO: add other information if available num = 0 for gene in self.genes: for associated in self.genes[gene].links["g"]: num += 1 Network.create_sbml_reaction (model, 'r' + str (num), nodemap[gene], nodemap[associated.identifier]) for associated in self.genes[gene].links["gc"]: num += 1 Network.create_sbml_reaction (model, 'r' + str (num), nodemap[gene], nodemap[associated.identifier]) for gene in self.gene_complexes: for associated in self.gene_complexes[gene].links["g"]: num += 1 Network.create_sbml_reaction (model, 'r' + str (num), nodemap[gene], nodemap[associated.identifier]) for associated in self.gene_complexes[gene].links["gc"]: num += 1 Network.create_sbml_reaction (model, 'r' + str (num), nodemap[gene], nodemap[associated.identifier]) return SBMLWriter().writeSBML (sbml, file_path)
def __create_sbml_gene_complex (self, model, identifier, name, compartment, gemtractor, genes, nodemap): """ create an SBML gene-complex species for the Enzyme-centric network :param model: the SBML model :param identifier: the identifier of the gene complex :param name: the gene complex' name :param compartment: the compartment in which the gene complex will appear :param gemtractor: the GEMtractor :param genes: the genes, which are part of this complex :param nodemap: mapper to map gene identifiers to corresponding SBML species :type model: `libsbml:Model <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_model.html>`_ :type identifier: str :type name: str :type compartment: `libsbml:Compartment <sbml.org/Software/libSBML/docs/python-api/classlibsbml_1_1_compartment.html>`_ :type gemtractor: :class:`.gemtractor.GEMtractor` :type genes: list of :class:`.gene.Gene` :type nodemap: dict :return: the SBML species representing a gene complex :rtype: `libsbml:Species <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_species.html>`_ """ g = model.createSpecies () g.setId (identifier) g.setMetaId (identifier) g.setName (name) g.setCompartment(compartment.getId()) g.setHasOnlySubstanceUnits(False) g.setBoundaryCondition(False) g.setConstant(False) annotations = "" for gene in genes: annotations += '<rdf:li rdf:resource="#' + nodemap[gene.identifier].getMetaId () + '" />' if len (annotations) > 0: annotations = """ <annotation> <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:bqbiol="http://biomodels.net/biology-qualifiers/"> <rdf:Description rdf:about="#""" + identifier + """"> <bqbiol:hasPart> <rdf:Bag>""" + annotations + """</rdf:Bag> </bqbiol:hasPart> </rdf:Description> </rdf:RDF> </annotation>""" if g.setAnnotation (annotations) != LIBSBML_OPERATION_SUCCESS: self.__logger.error ("unable to add annotation to gene " + identifier) self.__logger.debug ("annotation was: " + annotations) else: self.__logger.warn ("gene complex has no genes: " + identifier) return g def __create_sbml_gene (self, model, identifier, name, compartment, gemtractor): """ create an SBML gene species for the Enzyme-centric network :param model: the SBML model :param identifier: the identifier of the gene :param name: the gene's name :param compartment: the compartment in which the gene will appear :param gemtractor: the GEMtractor :type model: `libsbml:Model <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_model.html>`_ :type identifier: str :type name: str :type compartment: `libsbml:Compartment <sbml.org/Software/libSBML/docs/python-api/classlibsbml_1_1_compartment.html>`_ :type gemtractor: :class:`.gemtractor.GEMtractor` :return: the SBML species representing a gene :rtype: `libsbml:Species <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_species.html>`_ """ g = model.createSpecies () g.setId (identifier) g.setMetaId (identifier) g.setName (name) g.setCompartment(compartment.getId()) g.setHasOnlySubstanceUnits(False) g.setBoundaryCondition(False) g.setConstant(False) annotations = gemtractor.get_gene_product_annotations (name) if annotations is not None: annotations = self.__annotation_about_pattern.sub('<rdf:Description rdf:about="#'+identifier+'">', annotations) if g.setAnnotation (annotations) != LIBSBML_OPERATION_SUCCESS: self.__logger.error ("unable to add annotation to gene " + identifier) self.__logger.debug ("annotation was: " + annotations) return g
[docs] @staticmethod def create_sbml_reaction (model, identifier, reactant, product): """ create an SBML reaction for the Enzyme-centric or Reaction-centric network the reaction will consume and produce exactly one species :param model: the SBML model :param identifier: the identifier of the reaction :param reactant: the reactant consumed by the reaction :param product: the product produced by the reaction :type model: `libsbml:Model <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_model.html>`_ :type identifier: str :type reactant: `libsbml:Species <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_species.html>`_ :type product: `libsbml:Species <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_species.html>`_ :return: the SBML reaction :rtype: `libsbml:Reaction <http://sbml.org/Special/Software/libSBML/docs/python-api/classlibsbml_1_1_reaction.html>`_ """ r= model.createReaction () r.setId (identifier) r.setFast(False) r.setReversible(False) r.addReactant (reactant) r.addProduct (product) return r
[docs] def export_mn_csv (self, file_path): """ export the metabolite-reaction network in CSV format :param file_path: where to store the exported format? :type file_path: str """ with open(file_path, 'w') as f: f.write ('"source","target"\n') for identifier, reaction in self.reactions.items (): rid = 'r' + identifier for s in reaction.consumed: f.write ('"s' + s + '","' + rid + '"\n') for s in reaction.produced: f.write ('"' + rid + '","s' + s + '"\n')
[docs] def export_rn_csv (self, file_path): """ export the reaction-centric network in CSV format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_reaction_net: self.calc_reaction_net () with open(file_path, 'w') as f: f.write ('"source","target"\n') for identifier, reaction in self.reactions.items (): for r in reaction.links: f.write ('"' + identifier + '","' + r.identifier + '"\n')
[docs] def export_en_csv (self, file_path): """ export the enzyme-centric network in CSV format :param file_path: where to store the exported format? :type file_path: str """ if not self.have_gene_net: self.calc_genenet () with open(file_path, 'w') as f: f.write ('"source","target"\n') for gene in self.genes: for associated in self.genes[gene].links["g"]: f.write ('"' + gene + '","' + associated.identifier + '"\n') for associated in self.genes[gene].links["gc"]: f.write ('"' + gene + '","' + associated.identifier + '"\n') for gene in self.gene_complexes: for associated in self.gene_complexes[gene].links["g"]: f.write ('"' + gene + '","' + associated.identifier + '"\n') for associated in self.gene_complexes[gene].links["gc"]: f.write ('"' + gene + '","' + associated.identifier + '"\n')