Source code for aoptk.normalization.pubchem_api

from __future__ import annotations
from typing import TYPE_CHECKING
import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from aoptk.normalization.normalize_chemical import NormalizeChemical

if TYPE_CHECKING:
    from aoptk.chemical import Chemical


[docs] class PubChemAPI(NormalizeChemical): """Use PubChem API to normalize chemical names."""
[docs] timeout = 10
def __init__(self):
[docs] self._session = requests.Session()
retry_strategy = Retry( total=5, backoff_factor=3, status_forcelist=[429, 500, 502, 503, 504], allowed_methods=["GET", "POST"], ) adapter = HTTPAdapter(max_retries=retry_strategy) self._session.mount("https://", adapter)
[docs] def normalize_chemical(self, chemical: Chemical) -> Chemical: """Use the PubChem API to normalize a chemical name. This method may modify the given ``chemical`` instance in-place by updating its ``heading`` attribute when a title is found in PubChem. The same ``chemical`` instance that is passed in is returned. """ if title_name := self._find_title_in_pubchem(chemical.name): chemical.heading = title_name if synonyms := self._find_synonyms_in_pubchem(chemical.name): chemical.synonyms.clear() chemical.synonyms.update(synonyms) return chemical
[docs] def _find_title_in_pubchem(self, chemical_name: str) -> str | None: """Find the title chemical name from PubChem.""" search_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{chemical_name}/property/Title/TXT" response = self._session.get(search_url, timeout=self.timeout) if not response.ok: return chemical_name return response.text.strip().lower()
[docs] def _find_synonyms_in_pubchem(self, chemical_name: str) -> set[str]: """Find synonyms for a chemical name from PubChem.""" search_url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{chemical_name}/synonyms/TXT" response = self._session.get(search_url, timeout=self.timeout) if not response.ok: return {""} return set(response.text.strip().lower().splitlines())