from operator import or_
from functools import reduce, lru_cache
from typing import Any, Dict, List, Optional, Set, Tuple
from pydantic import BaseModel, Field
from backports.cached_property import cached_property
from pyhpo.config import MODIFIER_IDS
from pyhpo.similarity import SimScore
from pyhpo.annotations import GeneSingleton
from pyhpo.annotations import OmimDisease, DecipherDisease, OrphaDisease
from pyhpo.parser.generics import id_from_string
[docs]class InformationContent(BaseModel):
"""
InformationContent contains automatically calculated IC based on
direct/indirect associations with genes, omim, orpha and decipher.
IC instances are created automatically and accessed through
:class:`pyhpo.term.HPOTerm` instances.
Users can also register and calculate custom IC scores via
:func:`pyhpo.term.InformationContent.set_custom`.
"""
gene: float = 0.0 # Gene based IC
omim: float = 0.0 # OMIM based IC
orpha: float = 0.0 # OrphaNet based IC
decipher: float = 0.0 # Decipher based IC
custom: Dict[str, float] = Field(default_factory=dict)
def __getitem__(self, key: str) -> float:
"""
The IC is frequently accessed dynamically. e.g. in PyhPOAPI the
kind of IC is specified in the query (omim / gene)
Due to this, a dynamic access method is provided
.. code-block:: python
ic_kind = 'omim'
term.information.content[ic_kind]
"""
try:
return float(self.__getattribute__(key))
except AttributeError as err:
if key in self.custom:
return self.custom[key]
else:
raise AttributeError from err
[docs] def set_custom(self, key: str, value: float) -> None:
"""
Set the IC of a custom score
Parameters
----------
key: str
The name of the information-content metric
value: float
The actual information content
**Example:** ::
for term in Ontology:
# For some reason, you want to base the information content
# on the depths of the Term in the ontology
term.setcustom('depth', term.shortest_path_to_root())
# and now calculate similarity of two sets
my_similarity = term_set_1.similarity(term_set_2, kind='depth')
"""
self.custom[key] = value
[docs]class HPOTerm(BaseModel):
"""
An HPOTermBase instance can be build solely by itself,
without knowledge of the actual Ontology
"""
# Always present and mandatory
id: str
"""
The HPO identifier
"""
name: str
"""
The name of the HPO term
"""
# Mandatory, calculated during initialization
index: int
"""
The integer representation of the HPO identifier
"""
_hash: int
# Mandatory for HPOTerm, but not always present in input
comment: str = ''
definition: str = ''
_is_a: List[str] = []
synonym: List[str] = []
xref: List[str] = []
alt_id: List[str] = []
# Special logic for some obsolete terms
is_obsolete: bool = False
replaced_by: Optional[str] = None
consider: List[str] = []
# Computed once all HPO Terms are present in the Ontology
parents: Set['HPOTerm'] = set()
children: Set['HPOTerm'] = set()
genes: Set[GeneSingleton] = set()
omim_diseases: Set[OmimDisease] = set()
omim_excluded_diseases: Set[OmimDisease] = set()
orpha_diseases: Set[OrphaDisease] = set()
orpha_excluded_diseases: Set[OrphaDisease] = set()
decipher_diseases: Set[DecipherDisease] = set()
decipher_excluded_diseases: Set[DecipherDisease] = set()
information_content: InformationContent = InformationContent()
"""
The :class:`.InformationContent` of the HPO term.
Multiple kinds of IC are automatically calculated,
others can be manually calculated.
"""
def __init__(self, **kwargs) -> None: # type: ignore
kwargs['index'] = id_from_string(kwargs['id'])
super().__init__(**kwargs)
self._hash = hash((
self.index,
self.name
))
self._is_a = kwargs.get('is_a', [])
@cached_property
def all_parents(self) -> Set['HPOTerm']:
hierarchy_set = reduce(
or_,
[set(path) for path in self.hierarchy]
)
hierarchy_set.remove(self)
return hierarchy_set
@cached_property
def hierarchy(self) -> Tuple[Tuple['HPOTerm', ...], ...]:
"""
Calculates all paths from current term to Root term
and returns each path as a Tuple of HPOTerms
.. note::
This function is expensive. To ensure better performance, the
result is cached and all subsequent calls utilize the cache. Don't
call ``hierarchy`` before the Ontology is fully built with all
items.
Returns
-------
tuple of tuple of :class:`.HPOTerm` s
Tuple of paths. Each path is another tuple made up of HPOTerms
"""
if not self.parents:
return ((self,),)
paths: List[Tuple['HPOTerm', ...]] = []
for parent in self.parents:
for path in parent.hierarchy:
paths.append((self,) + path)
return tuple(paths)
@cached_property
def is_modifier(self) -> bool:
return int(self) in MODIFIER_IDS or bool(
MODIFIER_IDS & {int(x) for x in self.all_parents}
)
[docs] def parent_ids(self) -> List[int]:
return [
id_from_string(item) for item in self._is_a
]
[docs] def parent_of(self, other: 'HPOTerm') -> bool:
"""
Checks if ``self`` is a direct or indirect parent of ``other``.
Parameters
----------
other: :class:`.HPOTerm`
HPOTerm to check for lineage dependency
Returns
-------
bool
Is the HPOTerm a direct or indirect parent of another HPOTerms
"""
return other.child_of(self)
[docs] def child_of(self, other: 'HPOTerm') -> bool:
"""
Checks if ``self`` is a direct or indirect child of ``other``.
Parameters
----------
other: :class:`.HPOTerm`
HPOTerm to check for lineage dependency
Returns
-------
bool
Is the HPOTerm a direct or indirect child of another HPOTerms
"""
if self == other:
raise RuntimeError('An HPO term cannot be parent/child of itself')
return other in self.all_parents
[docs] def common_ancestors(self, other: 'HPOTerm') -> Set['HPOTerm']:
"""
Identifies all common ancestors
of two HPO terms
Parameters
----------
other: :class:`.HPOTerm`
Target HPO term for path finding
Returns
-------
set
Set of common ancestor HPOTerms
"""
# Return the intersection of all ancestors of self and other.
# Consider the following edge cases:
# - self is in other.all_parents
# - other is in self.all_parents
# To account for these edge cases,
# we first add self to self.all_parents
# and other to other.all_parents
self_ancestors: Set['HPOTerm'] = (
self.all_parents | set([self])
)
other_ancestors: Set['HPOTerm'] = (
other.all_parents | set([other])
)
return self_ancestors & other_ancestors
[docs] def longest_path_to_root(self) -> int:
"""
Calculates the longest path to root
Returns
-------
int
Maximum number of nodes until the root HPOTerm
"""
return max([
len(h)-1 for h in self.hierarchy
])
[docs] def shortest_path_to_root(self) -> int:
"""
Calculates the shortest path to root
Returns
-------
int
Minimum number of nodes until the root HPOTerm
"""
return min([
len(h)-1 for h in self.hierarchy
])
[docs] def shortest_path_to_parent(
self,
other: 'HPOTerm'
) -> Tuple[int, Tuple['HPOTerm', ...]]:
"""
Calculates the shortest path to another HPO Term
Parameters
----------
other: HPOTerm
parent HPOTerm instance
Returns
-------
int
Minimum number of nodes until the specified HPOTerm
(float('inf') if ``other`` is not a parent.)
tuple
Tuple of all HPOTerm instances on the path
(``None`` if ``other`` is not a parent)
"""
if other not in self.all_parents and self != other:
raise RuntimeError(
f'{other.id} is not a parent of {self.id}'
)
return_tuples: List[Tuple[int, Tuple['HPOTerm', ...]]] = []
for path in self.hierarchy:
try:
i = path.index(other)
return_tuples.append((i, path[:i+1]))
except ValueError:
pass
try:
return sorted(
return_tuples,
key=lambda x: x[0]
)[0]
except IndexError as err:
raise RuntimeError(
f'Unable to determine path to parent term {other.name}'
) from err
[docs] def longest_path_to_bottom(self, level: int = 0) -> int:
"""
Calculates how far the most distant child is apart
Parameters
----------
level: int
Offset level to indicate for calculation
Default: 0
Returns
-------
int:
Number of steps to most distant child
"""
if len(self.children):
return max([
child.longest_path_to_bottom(level + 1)
for child in self.children
])
else:
return level
[docs] def path_to_other(
self,
other: 'HPOTerm'
) -> Tuple[int, Tuple['HPOTerm', ...], int, int]:
"""
Identifies the shortest connection between
two HPO terms
Parameters
----------
other: HPOTerm
Target HPO term for path finding
Returns
-------
int
Length of path
tuple
Tuple of HPOTerms in the path
int
Number of steps from term-1 to the common parent
int
Number of steps from term-2 to the common parent
"""
common = self.common_ancestors(other)
paths = []
for term in common:
path1 = self.shortest_path_to_parent(term)
path2 = other.shortest_path_to_parent(term)
total_path = path1[1] + tuple(reversed(path2[1]))[1:]
paths.append((
int(path1[0] + path2[0]),
total_path,
int(path1[0]),
int(path2[0])
))
return sorted(paths, key=lambda x: x[0])[0]
[docs] def count_parents(self) -> int:
"""
Calculates total number of ancestral HPO Terms
Returns
-------
int
The number of all ancestral HPO Terms
"""
return sum([
parent.count_parents() + 1
for parent in self.parents
])
[docs] def similarity_score(
self,
other: 'HPOTerm',
kind: Optional[str] = None,
method: Optional[str] = None
) -> float:
"""
Calculate the similarity between this and another HPO-Term
It uses :class:`pyhpo.similarity.base._Similarity` underneath
Parameters
----------
other: `HPOTerm`
Other HPO term to compare similarity to
kind: str, default ``''``
Which kind of information content should be calculated.
Default option is defined in `pyhpo.similarity.base._Similarity`
Available options:
* **omim**
* **orpha**
* **decipher**
* **gene**
method: string, default ``''``
The method to use to calculate the similarity.
Default option is defined in `pyhpo.similarity.base._Similarity`
Available options:
* **resnik** - Resnik P, Proceedings of the 14th IJCAI, (1995)
* **lin** - Lin D, Proceedings of the 15th ICML, (1998)
* **jc** - Jiang J, Conrath D, ROCLING X, (1997)
Implementation according to R source code
* **jc2** - Jiang J, Conrath D, ROCLING X, (1997)
Implementation according to paper from R ``hposim`` library
Deng Y, et. al., PLoS One, (2015)
* **rel** - Relevance measure - Schlicker A, et.al.,
BMC Bioinformatics, (2006)
* **ic** - Information coefficient - Li B, et. al., arXiv, (2010)
* **graphic** - Graph based Information coefficient -
Deng Y, et. al., PLoS One, (2015)
* **dist** - Distance between terms
* Additional methods can be registered separately (
see :class::`pyhpo.similarity.base._Similarity`)
"""
return SimScore(self, other, kind, method)
@lru_cache(maxsize=128)
def cached_similarity_score(
self,
other: 'HPOTerm',
kind: str = '',
method: str = ''
) -> float:
"""
This is a LRU-chached alias of
:func:`pyhpo.term.HPOTerm.similarity_score`
"""
return self.similarity_score(other, kind, method)
[docs] def toJSON(
self,
verbose: bool = False
) -> dict:
"""
Creates a JSON-like object of the HPOTerm
Parameters
----------
verbose: bool, default ``False``
Include extra properties
Returns
-------
dict
A dictionary with the main properties of the HPOTerm
**Example:** ::
>>> terms[2].toJSON()
{
'name': 'Abnormality of body height',
'id': 'HP:0000002',
'int': 2
}
>>> terms[2].toJSON(verbose=True)
{
'name': 'Abnormality of body height',
'synonym': ['Abnormality of body height'],
'comment': None,
'def': '"Deviation from the norm of height with respect [...]',
'xref': ['UMLS:C4025901'],
'is_a': ['HP:0001507 ! Growth abnormality'],
'id': 'HP:0000002',
'int': 2
}
"""
res = {
'int': int(self),
'id': self.id,
'name': self.name
}
if verbose:
res['definition'] = self.definition
res['comment'] = self.comment
res['synonym'] = self.synonym
res['xref'] = self.xref
res['is_a'] = self._is_a
res['ic'] = self.information_content.dict()
return res
def to_obo(self) -> str:
raise NotImplementedError('Method is missing')
def __hash__(self) -> int:
"""
The hash is precalcuated during initialization
"""
return self._hash
def __int__(self) -> int:
return self.index
def __eq__(self, t2: Any) -> bool:
return hash(self) == hash(t2) and isinstance(t2, HPOTerm)
def __lt__(self, other: Any) -> bool:
return int(self) < int(other)
def __str__(self) -> str:
return '{} | {}'.format(self.id, self.name)
def __repr__(self) -> str:
return (
f"HPOTerm(id='{self.id}', name='{self.name}', "
f"is_a={self._is_a})"
)
class Config:
arbitrary_types_allowed = True
underscore_attrs_are_private = True
keep_untouched = (cached_property, )
allow_mutation = True