Skip to content

onconova.terminology.special

CTCAETermsDigestor(verbose=True)

Bases: TerminologyDigestor

Digestor for CTCAE MedDRA terms.

Attributes:

Name Type Description
LABEL str

Identifier for the digestor.

FILENAME str

Name of the file containing drug to drug class mappings.

Source code in onconova/terminology/digestors.py
def __init__(self, verbose: bool = True) -> None:
    """
    Initialize the TerminologyDigestor.

    Args:
        verbose (bool, optional): Whether to print progress messages. Defaults to True.
    """
    try:
        self.file_location = get_file_location(self.PATH, self.FILENAME)
    except FileNotFoundError:
        # Unzip into DATA_DIR
        zip_file_path = os.environ.get("ONCONOVA_SNOMED_ZIPFILE_PATH", "")
        if not zip_file_path or not os.path.isfile(zip_file_path):
            print(
                "ERROR FILE NOT FOUND:\nPlease download the SNOMEDCT_International_*.zip file from (requires a login and license):\nand specify the location of the zip file with the ONCONOVA_SNOMED_ZIPFILE_PATH variable.\n"
            )
            sys.exit(1)
        with zipfile.ZipFile(zip_file_path) as zip_ref:
            zip_ref.extractall(self.PATH)

        # Move files into TEMP_DIR
        print("• Unpacking SNOMED CT files...")
        temp_dir = os.path.join(os.path.basename(zip_file_path), ".snomed")
        os.makedirs(temp_dir, exist_ok=True)
        snomed_dirs = glob.glob(os.path.join(self.PATH, "SnomedCT_*"))
        for snomed_dir in snomed_dirs:
            for item in os.listdir(snomed_dir):
                src = os.path.join(snomed_dir, item)
                dst = os.path.join(temp_dir, item)
                shutil.move(src, dst)

        # Move description and relationship files
        desc_src_pattern = os.path.join(
            temp_dir,
            "Snapshot",
            "Terminology",
            "sct2_Description_Snapshot-en_INT_*",
        )
        desc_files = glob.glob(desc_src_pattern)
        if desc_files:
            shutil.move(desc_files[0], os.path.join(self.PATH, "snomedct.tsv"))

        rel_src_pattern = os.path.join(
            temp_dir, "Snapshot", "Terminology", "sct2_Relationship_Snapshot_INT_*"
        )
        rel_files = glob.glob(rel_src_pattern)
        if rel_files:
            shutil.move(
                rel_files[0], os.path.join(self.PATH, "snomedct_relations.tsv")
            )

        # Remove TEMP_DIR and extracted SnomedCT_* directories
        print("• Clean-up unnecessary files...")
        shutil.rmtree(temp_dir, ignore_errors=True)
        for snomed_dir in snomed_dirs:
            shutil.rmtree(snomed_dir, ignore_errors=True)
    self.file_location = get_file_location(self.PATH, self.FILENAME)
    self.verbose = verbose

CANONICAL_URL class-attribute instance-attribute

FILENAME class-attribute instance-attribute

LABEL class-attribute instance-attribute

DrugCodedConcept

Bases: CodedConcept

Represents a coded concept for a drug, extending the CodedConcept class.

Attributes:

Name Type Description
therapy_category str | None

The category of therapy associated with the drug. Can be None if not specified.

therapy_category class-attribute instance-attribute

NCITAntineoplasticAgentsSubsetDigestor(verbose=True)

Bases: TerminologyDigestor

Digestor for the NCIT Antineoplastic Agents subset terminology.

Source code in onconova/terminology/digestors.py
def __init__(self, verbose: bool = True) -> None:
    """
    Initialize the TerminologyDigestor.

    Args:
        verbose (bool, optional): Whether to print progress messages. Defaults to True.
    """
    try:
        self.file_location = get_file_location(self.PATH, self.FILENAME)
    except FileNotFoundError:
        # Unzip into DATA_DIR
        zip_file_path = os.environ.get("ONCONOVA_SNOMED_ZIPFILE_PATH", "")
        if not zip_file_path or not os.path.isfile(zip_file_path):
            print(
                "ERROR FILE NOT FOUND:\nPlease download the SNOMEDCT_International_*.zip file from (requires a login and license):\nand specify the location of the zip file with the ONCONOVA_SNOMED_ZIPFILE_PATH variable.\n"
            )
            sys.exit(1)
        with zipfile.ZipFile(zip_file_path) as zip_ref:
            zip_ref.extractall(self.PATH)

        # Move files into TEMP_DIR
        print("• Unpacking SNOMED CT files...")
        temp_dir = os.path.join(os.path.basename(zip_file_path), ".snomed")
        os.makedirs(temp_dir, exist_ok=True)
        snomed_dirs = glob.glob(os.path.join(self.PATH, "SnomedCT_*"))
        for snomed_dir in snomed_dirs:
            for item in os.listdir(snomed_dir):
                src = os.path.join(snomed_dir, item)
                dst = os.path.join(temp_dir, item)
                shutil.move(src, dst)

        # Move description and relationship files
        desc_src_pattern = os.path.join(
            temp_dir,
            "Snapshot",
            "Terminology",
            "sct2_Description_Snapshot-en_INT_*",
        )
        desc_files = glob.glob(desc_src_pattern)
        if desc_files:
            shutil.move(desc_files[0], os.path.join(self.PATH, "snomedct.tsv"))

        rel_src_pattern = os.path.join(
            temp_dir, "Snapshot", "Terminology", "sct2_Relationship_Snapshot_INT_*"
        )
        rel_files = glob.glob(rel_src_pattern)
        if rel_files:
            shutil.move(
                rel_files[0], os.path.join(self.PATH, "snomedct_relations.tsv")
            )

        # Remove TEMP_DIR and extracted SnomedCT_* directories
        print("• Clean-up unnecessary files...")
        shutil.rmtree(temp_dir, ignore_errors=True)
        for snomed_dir in snomed_dirs:
            shutil.rmtree(snomed_dir, ignore_errors=True)
    self.file_location = get_file_location(self.PATH, self.FILENAME)
    self.verbose = verbose

FILENAME class-attribute instance-attribute

LABEL class-attribute instance-attribute

digest(*args, **kwargs)

Source code in onconova/terminology/special.py
def digest(self, *args, **kwargs) -> dict[str, str]:
    self.designations = defaultdict(list)
    self.concepts = {}
    self._digest_concepts()
    for code, synonyms in self.designations.items():
        self.concepts[code].synonyms = synonyms
    return self.concepts

add_gene_exons()

Populates the GeneExon table with exon information for each gene.

This function retrieves exon data from the EnsemblExonsDigestor, iterates through each gene symbol, and updates the database by creating or retrieving GeneExon objects for each exon associated with a gene. The function uses tqdm to display progress and sets exon attributes such as rank, coding DNA region, and coding genomic region.

Source code in onconova/terminology/special.py
def add_gene_exons():
    """
    Populates the GeneExon table with exon information for each gene.

    This function retrieves exon data from the EnsemblExonsDigestor, iterates through each gene symbol,
    and updates the database by creating or retrieving GeneExon objects for each exon associated with a gene.
    The function uses tqdm to display progress and sets exon attributes such as rank, coding DNA region,
    and coding genomic region.
    """
    exons_map = EnsemblExonsDigestor().digest()
    for gene_symbol in tqdm(
        exons_map, total=len(exons_map), desc="• Updating gene exons"
    ):
        gene = Gene.objects.filter(display=gene_symbol).first()
        if gene:
            for exon in exons_map[gene_symbol]:
                GeneExon.objects.get_or_create(
                    gene=gene,
                    rank=exon.rank,
                    defaults=dict(
                        coding_dna_region=(exon.coding_dna_start, exon.coding_dna_end),
                        coding_genomic_region=(
                            exon.coding_genomic_start,
                            exon.coding_genomic_end,
                        ),
                    ),
                )

expand_antineoplastic_agent_concepts()

Expands and classifies antineoplastic agent concepts using NCIT codes.

This function loads or creates a cache of NCIT descendant codes, fetches and classifies antineoplastic agents into therapy categories (e.g., immunotherapy, hormone therapy, chemotherapy, etc.) by traversing the NCIT ontology tree, and returns a list of DrugCodedConcept objects with assigned therapy categories.

Returns:

Type Description
List[DrugCodedConcept]

List[DrugCodedConcept]: A list of DrugCodedConcept objects representing antineoplastic agents,

List[DrugCodedConcept]

each annotated with its therapy category.

Source code in onconova/terminology/special.py
def expand_antineoplastic_agent_concepts() -> List[DrugCodedConcept]:
    """
    Expands and classifies antineoplastic agent concepts using NCIT codes.

    This function loads or creates a cache of NCIT descendant codes, fetches and classifies
    antineoplastic agents into therapy categories (e.g., immunotherapy, hormone therapy, chemotherapy, etc.)
    by traversing the NCIT ontology tree, and returns a list of DrugCodedConcept objects with
    assigned therapy categories.

    Returns:
        List[DrugCodedConcept]: A list of DrugCodedConcept objects representing antineoplastic agents,
        each annotated with its therapy category.
    """
    current_path = os.path.dirname(__file__)
    cache_file = (
        f"{current_path}/external-sources/ncit_antineoplastic_descendants.cache.json"
    )
    if not os.path.exists(cache_file):
        # Create an empty cache file if it does not exist
        with open(cache_file, "w") as f:
            json.dump({}, f)
    with open(cache_file, "r") as f:
        cache = json.load(f)

    def _get_NCIT_descendant_codes(codes):
        descendant_codes = []
        for code in codes:
            if code in cache:
                descendants = cache[code]
            else:
                # Fetch the descendants from the NCIT API
                print(f"• Fetching descendants for {code} from NCIT API...")
                descendants = [
                    concept["code"]
                    for concept in request_http_get(
                        f"https://api-evsrest.nci.nih.gov/api/v1/concept/ncit/{code}/descendants?fromRecord=0&pageSize=50000&maxLevel=10000"
                    )
                ]
                cache[code] = descendants
                with open(cache_file, "w") as f:
                    json.dump(cache, f, indent=2)
            descendant_codes.extend(descendants)
        return descendant_codes

    from onconova.terminology.services import download_codesystem

    concepts = {}
    # Prepare the NCIT codesystem and its tree structre
    ncit_codesystem = download_codesystem(NCITDigestor.CANONICAL_URL)
    # Digest the NCTPOT maps
    ncit_antineoplastic_drugs = list(
        NCITAntineoplasticAgentsSubsetDigestor().digest().values()
    )
    # Add the concepts from the NCIT Antineoplastic agents tree
    print(f"• Updating antineoplastic agent classifications...")

    therapy_categories = AntineoplasticAgent.TherapyCategory
    categories = {
        therapy_categories.IMMUNOTHERAPY: _get_NCIT_descendant_codes(
            ["C308", "C20401"]
        ),
        therapy_categories.HORMONE_THERAPY: _get_NCIT_descendant_codes(
            ["C147908", "C29701"]
        ),
        therapy_categories.METABOLIC_THERAPY: _get_NCIT_descendant_codes(["C177430"]),
        therapy_categories.ANTIMETASTATIC_THERAPY: _get_NCIT_descendant_codes(
            ["C2196"]
        ),
        therapy_categories.TARGETED_THERAPY: _get_NCIT_descendant_codes(
            ["C163758", "C1742", "C471", "C2189", "C177298", "C129839"]
        ),
        therapy_categories.CHEMOTHERAPY: _get_NCIT_descendant_codes(["C186664"]),
        therapy_categories.RADIOPHARMACEUTICAL_THERAPY: _get_NCIT_descendant_codes(
            ["C1446"]
        ),
    }

    # Add other NCTPOT concepts not in the NCT Antineoplastic agents tree

    print(f"• Processing antineoplastic agents metadata...")
    for ncit_code in ncit_antineoplastic_drugs:
        concept = ncit_codesystem.get(ncit_code)
        if not concept:
            continue
        concepts[concept.code] = DrugCodedConcept(**concept.model_dump())
        concepts[concept.code].therapy_category = therapy_categories.UNCLASSIFIED
        for category, category_codes in categories.items():
            if concept.code in category_codes:
                concepts[concept.code].therapy_category = category
                break

        ancestor = ncit_codesystem.get(concept.parent or "")
        while ancestor and ancestor.code != "C1909":  # Pharmacologic Substance
            concepts[ancestor.code] = ancestor
            ancestor = ncit_codesystem.get(ancestor.parent or "")

    return list(concepts.values())

expand_ctcae_terms()

Expands and returns a list of CTCAE (Common Terminology Criteria for Adverse Events) coded concepts.

Uses the CTCAETermsDigestor to process and retrieve all available CTCAE terms.

Returns:

Type Description
List[CodedConcept]

List[CodedConcept]: A list of coded concepts representing CTCAE terms.

Source code in onconova/terminology/special.py
def expand_ctcae_terms() -> List[CodedConcept]:
    """
    Expands and returns a list of CTCAE (Common Terminology Criteria for Adverse Events) coded concepts.

    Uses the CTCAETermsDigestor to process and retrieve all available CTCAE terms.

    Returns:
        List[CodedConcept]: A list of coded concepts representing CTCAE terms.
    """
    return list(CTCAETermsDigestor().digest().values())
runner