from typing import Any, ClassVar, Dict, Set, Union
from pydantic import BaseModel
class Annotation(BaseModel):
id: int
name: str
hpo: Set[int] = set()
_hash: int
_json_keys = set(['id', 'name'])
def __init__(self, **kwargs: Union[int, str]) -> None:
super().__init__(**kwargs)
self._hash = hash((self.id, self.name))
def toJSON(self, verbose: bool = False) -> dict:
"""
Backwards compatibility method
BaseModel include ``.json`` method
Parameters
----------
verbose: bool, default: ``False``
Return all associated HPOTerms
Returns
-------
dict
A dict with the following keys
(additional keys might be present, depending on the class)
* **id** - The HGNC ID
* **name** - The gene symbol
* **hpo** - (If ``verbose == True``):
set of :class:`pyhpo.term.HPOTerm`
"""
res = {}
for key in self._json_keys:
res[key] = self.__getattribute__(key)
if verbose:
res['hpo'] = self.hpo
return res
def __eq__(self, other: Any) -> bool:
if isinstance(other, int):
return self.id == other
if isinstance(other, str):
return self.name == other
return hash(self) == hash(other)
def __hash__(self) -> int:
return self._hash
def __str__(self) -> str:
return self.name
class Config:
allow_mutation = True
underscore_attrs_are_private = True
[docs]class GeneSingleton(Annotation):
"""
An instance of ``GeneSingleton`` represents a single gene.
.. note::
``GeneSingleton`` should never be initiated directly,
but only via :class:`.GeneDict`
to ensure that every gene is only created once.
Attributes
----------
id: int
HGNC gene ID
name: str
HGNC gene synbol
symbol: str
HGNC gene symbol (alias of :attr:`.GeneSingleton.name`)
hpo: set of :class:`pyhpo.term.HPOTerm`
all HPOTerms associated to the gene
Parameters
----------
id: int
HGNC gene ID
name: str
HGNC gene synbol
"""
_json_keys = set(['id', 'name', 'symbol'])
@property
def symbol(self) -> str:
return self.name
[docs]class GeneDict(dict):
"""
An associative dict of all genes
Ensures that every gene is a single GeneSingleton instance
and no duplicate instances are generated during parsing of the
Gen-Pheno-HPO associations.
This class is initilized once and genes are created by calling
the instance of GeneDict to ensure that the same gene exists only once.
For example ::
Gene = GeneDict()
gba = Gene(symbol='GBA')
ezh2 = Gene(symbol='EZH2')
gba_2 = Gene(symbol='GBA')
gba is ezh2
>> False
gba is gba_2
>> True
Parameters
----------
cols: list, default: ``None``
Only used for backwards compatibility reasons.
Should have the following entries
* None
* None
* HGNC-ID
* Gene symbol
hgncid: int
The HGNC ID
symbol: str
The gene symbol (alternative to name)
Returns
-------
:class:`.GeneSingleton`
"""
def __init__(self) -> None:
self._indicies: Dict[int, GeneSingleton] = {}
self._names: Dict[str, GeneSingleton] = {}
def __call__(
self,
hgncid: int,
symbol: str
) -> GeneSingleton:
try:
return self._names[symbol]
except KeyError:
pass
try:
return self._indicies[hgncid]
except KeyError:
pass
gene = GeneSingleton(id=hgncid, name=symbol) # type: ignore
self[gene] = gene
self._indicies[hgncid] = gene
self._names[symbol] = gene
return gene
def clear(self) -> None:
"""
Removes all Genes to start with blank state.
There are almot zero use-cases to ever call this method in
client-side code. If you use code, that modifies the ``GeneDict``
class a lot, you might use this. Under normal circumstences,
this should not be needed.
Currently, the primary use case is to clean up the state for
unittests.
"""
self._indicies.clear()
self._names.clear()
dict.clear(self)
def get(
self,
query: Union[int, str],
default: Any = None
) -> GeneSingleton:
"""
Allows client to query for a gene by both ID and symbol.
This method is useful for client that do not want to add new
genes
Parameters
----------
query: int or str
The (most likely user supplied) query.
Can be either the HGNC-ID or the gene symbol
Returns
-------
GeneSingleton
If a gene is found, it is returned. Otherwise an Error is raised
"""
try:
idx: int = int(query)
return self._indicies[idx]
except (ValueError, KeyError):
idx = None # type: ignore[assignment] # desired
try:
return self._names[str(query)]
except KeyError:
raise KeyError('No gene found for query')
[docs]class DiseaseSingleton(Annotation):
"""
This class represents a single disease.
.. note::
``DiseaseSingleton`` should never be initiated directly,
but only via the appropriate disease dictionary, e.g.
:class:`.OmimDict` (:class:`.DiseaseDict`)
to ensure that every disease is only created once.
Attributes
----------
id: int
Disease ID
name: str
disease name
hpo: set of :class:`pyhpo.term.HPOTerm`
all HPOTerms associated to the disease
negative_hpo: set of :class:`pyhpo.term.HPOTerm`
HPOTerms not associated to the disease
Parameters
----------
id: int
Disease ID
name: str
Disease name
"""
diseasetype = 'Undefined'
negative_hpo: Set[int] = set()
class OmimDisease(DiseaseSingleton):
diseasetype = 'Omim'
class OrphaDisease(DiseaseSingleton):
diseasetype = 'Orpha'
class DecipherDisease(DiseaseSingleton):
diseasetype = 'Decipher'
[docs]class DiseaseDict(dict):
"""
An associative dict of all Omim Diseases
Ensures that every Omim Disease is a single OmimDisease instance
and no duplicate instances are generated during parsing of the
Gen-Pheno-HPO associations.
This class is initilized once and diseases are created by calling
the instance of ``DiseaseDict`` to ensure that the same disease
exists only once.
For example ::
Disease = OmimDict()
gaucher = Disease(diseaseid=1)
fabry = Disease(diseaseid=2)
gaucher_2 = Disease(diseaseid=1)
gaucher is fabry
>> False
gaucher is gaucher_2
>> True
Parameters
----------
cols: list, default: ``None``
Only used for backwards compatibility reasons.
Should have the following entries
* None
* Disease ID
* Disease Name
diseaseid: int
The Disease ID
name: str
The disease name
Returns
-------
:class:`.DiseaseSingleton`
"""
disease_class: ClassVar = None
def __init__(self) -> None:
self._indicies: Dict[int, DiseaseSingleton] = {}
def __call__(
self,
diseaseid: int,
name: str
) -> DiseaseSingleton:
assert self.disease_class
try:
return self._indicies[diseaseid]
except KeyError:
pass
disease: DiseaseSingleton = self.disease_class(id=diseaseid, name=name)
self[disease] = disease
self._indicies[diseaseid] = disease
return disease
def clear(self) -> None:
"""
Removes all Diseases to start with blank state.
There are almot zero use-cases to ever call this method in
client-side code. If you use code, that modifies the ``DiseaseDict``
class a lot, you might use this. Under normal circumstences,
this should not be needed.
Currently, the primary use case is to clean up the state for
unittests.
"""
self._indicies.clear()
dict.clear(self)
def get(
self,
query: Union[int, str],
default: Any = None
) -> DiseaseSingleton:
"""
Allows client to query for a disease by ID.
This method is useful for client that do not want to add new
diseases
Parameters
----------
query: int
The (most likely user supplied) query for Disease ID.
Returns
-------
DiseaseSingleton
If a disease is found, it is returned. Otherwise an Error is raised
"""
try:
idx = int(query)
return self._indicies[idx]
except ValueError:
raise ValueError('Invalid Disease ID supplied')
except KeyError:
raise KeyError('No disease found for query')
class OmimDict(DiseaseDict):
disease_class = OmimDisease
class OrphaDict(DiseaseDict):
disease_class = OrphaDisease
class DecipherDict(DiseaseDict):
disease_class = DecipherDisease
Omim = OmimDict()
Orpha = OrphaDict()
Decipher = DecipherDict()
Gene = GeneDict()