Skip to content

onconova.core.anonymization.base

AVERAGE_MONTH module-attribute

MAX_DATE_SHIFT module-attribute

REDACTED_STRING module-attribute

AnonymizationConfig

fields instance-attribute

functions class-attribute instance-attribute

key instance-attribute

AnonymizationMixin

Mixin class for automatic anonymization of specified model fields after validation.

Attributes:

Name Type Description
anonymized bool

Flag indicating whether anonymization should be applied.

__anonymization_fields__ Tuple[str, ...]

Class-level tuple listing the field names to anonymize.

__anonymization_key__ Optional[str]

Optional key used in the anonymization process.

__anonymization_functions__ Dict[str, Callable]

Optional mapping of field names to custom anonymization functions.

Methods:

Name Description
anonymize_data

Pydantic model validator that anonymizes the configured fields if 'anonymized' is True.

anonymize_value

Instance method for anonymizing a single value, can be overridden by subclasses.

__post_anonymization_hook__

Hook method for subclasses to implement post-anonymization actions.

__anonymization_fields__ class-attribute

__anonymization_functions__ class-attribute

__anonymization_key__ class-attribute

anonymized class-attribute instance-attribute

__post_anonymization_hook__()

Hook method for subclasses to implement additional actions after anonymization. Default implementation does nothing.

Source code in onconova/core/anonymization/base.py
def __post_anonymization_hook__(self):
    """
    Hook method for subclasses to implement additional actions after anonymization.
    Default implementation does nothing.
    """
    pass

anonymize_data()

Source code in onconova/core/anonymization/base.py
@model_validator(mode="after")
def anonymize_data(self) -> Any:
    # If schema is not set to be anonymized, just return current validated state
    if not self.anonymized:
        return self

    # Go over all fields configured to be anonymized
    for field in self.__anonymization_fields__:

        # If the field has no value, no need to anonymize
        value = getattr(self, field)
        if not value:
            continue

        # Use field-specific anonymizer if available, else fallback
        anonymizer = self.__anonymization_functions__.get(
            field, self.anonymize_value
        )
        anonymized_value = anonymizer(value)

        # Set the anonymized value
        setattr(self, field, anonymized_value)

    # Call post-anonymization hook for subclasses
    self.__post_anonymization_hook__()

    return self

anonymize_value(value)

Hook for per-instance anonymization logic.

Source code in onconova/core/anonymization/base.py
def anonymize_value(self, value):
    """Hook for per-instance anonymization logic."""
    return anonymize_value(value, self.__anonymization_key__ or "")

anonymize_age(age)

Anonymize an age value by mapping it to its corresponding age bin.

Parameters:

Name Type Description Default

age

Age

The age value to be anonymized.

required

Returns:

Name Type Description
AgeBin AgeBin

The anonymized age bin.

Source code in onconova/core/anonymization/base.py
def anonymize_age(age: Age) -> AgeBin:
    """Anonymize an age value by mapping it to its corresponding age bin.

    Args:
        age (Age): The age value to be anonymized.

    Returns:
        AgeBin: The anonymized age bin.
    """
    bins = [
        (AgeBin.SUB_20, (0, 19)),
        (AgeBin.AGE_20_24, (20, 24)),
        (AgeBin.AGE_25_29, (25, 29)),
        (AgeBin.AGE_30_34, (30, 34)),
        (AgeBin.AGE_35_39, (35, 39)),
        (AgeBin.AGE_40_44, (40, 44)),
        (AgeBin.AGE_45_49, (45, 49)),
        (AgeBin.AGE_50_54, (50, 54)),
        (AgeBin.AGE_55_59, (55, 59)),
        (AgeBin.AGE_60_64, (60, 64)),
        (AgeBin.AGE_65_69, (65, 69)),
        (AgeBin.AGE_70_74, (70, 74)),
        (AgeBin.AGE_75_79, (75, 79)),
        (AgeBin.AGE_80_84, (80, 84)),
        (AgeBin.AGE_85_89, (85, 89)),
        (AgeBin.OVER_90, (90, 150)),
    ]
    if isinstance(age, AgeBin):
        return age
    for age_bin, (low, high) in bins:
        if (low is None or age >= low) and (high is None or age <= high):
            return age_bin
    raise ValueError(f"Age {age} is out of valid range")

anonymize_by_redacting_string(original_value)

Anonymizes a string by returning a redacted string.

Parameters:

Name Type Description Default

original_value

str

The value to be anonymized.

required

Returns:

Name Type Description
str str

The redacted string value.

Source code in onconova/core/anonymization/base.py
def anonymize_by_redacting_string(original_value: Any) -> str:
    """
    Anonymizes a string by returning a redacted string.

    Args:
        original_value (str): The value to be anonymized.

    Returns:
        str: The redacted string value.
    """
    return REDACTED_STRING

anonymize_clinically_relevant_date(original_date, case_id)

Anonymizes a date by shifting it by a random amount between -6 and 6 months.

Parameters:

Name Type Description Default

original_date

Union[date, datetime, str]

The date to be anonymized.

required

case_id

str

The case ID used to generate the random timeshift.

required

Returns:

Type Description
date | datetime

Union[date, datetime]: The anonymized date.

Source code in onconova/core/anonymization/base.py
def anonymize_clinically_relevant_date(
    original_date: date | datetime | str,
    case_id: str,
) -> date | datetime:
    """
    Anonymizes a date by shifting it by a random amount between -6 and 6 months.

    Args:
        original_date (Union[date, datetime, str]): The date to be anonymized.
        case_id (str): The case ID used to generate the random timeshift.

    Returns:
        Union[date, datetime]: The anonymized date.
    """
    if isinstance(original_date, str):
        try:
            original_date = datetime.strptime(original_date, "%Y-%m-%d").date()
        except ValueError:
            raise ValueError(f"Unrecognized date format: {original_date}")
    # Compute random timeshift of +-6 months based on a hash of the case ID
    timeshift = hash_to_range(
        case_id,
        secret=settings.ANONYMIZATION_SECRET_KEY,
        low=-MAX_DATE_SHIFT,
        high=MAX_DATE_SHIFT,
    )
    return (
        original_date + timedelta(days=abs(timeshift))
        if timeshift > 0
        else original_date - timedelta(days=abs(timeshift))
    )

anonymize_personal_date(original_date)

Anonymize a date by returning only the year.

Parameters:

Name Type Description Default

original_date

Union[datetime, date, str]

The date to be anonymized.

required

Returns:

Name Type Description
date date

The year of the anonymized date with month and day set to 1.

Source code in onconova/core/anonymization/base.py
def anonymize_personal_date(original_date: datetime | date | str) -> date:
    """
    Anonymize a date by returning only the year.

    Args:
        original_date (Union[datetime, date, str]): The date to be anonymized.

    Returns:
        date: The year of the anonymized date with month and day set to 1.
    """
    if isinstance(original_date, (datetime, date)):
        return datetime(original_date.year, 1, 1).date()
    elif isinstance(original_date, str):
        try:
            parsed_date = datetime.fromisoformat(original_date)
            return datetime(parsed_date.year, 1, 1).date()
        except ValueError:
            try:
                parsed_date = datetime.strptime(original_date, "%Y-%m-%d")
                return datetime(parsed_date.year, 1, 1).date()
            except ValueError:
                raise ValueError(f"Unrecognized date format: {original_date}")
    else:
        raise TypeError(f"Unsupported type: {type(original_date)}")

anonymize_value(value, case_id)

Anonymize a value by replacing it with a suitable placeholder.

This function anonymizes date/time fields by introducing a hash-based time-shift, anonymizes string fields by replacing by a placeholder, and anonymizes age fields by binning the age.

Parameters:

Name Type Description Default

value

Any

The value to be anonymized.

required

case_id

str

The case ID to be used for hash-based anonymization.

required

Returns:

Name Type Description
Any Any

The anonymized value.

Source code in onconova/core/anonymization/base.py
def anonymize_value(value: Any, case_id: str) -> Any:
    """
    Anonymize a value by replacing it with a suitable placeholder.

    This function anonymizes date/time fields by introducing a hash-based time-shift,
    anonymizes string fields by replacing by a placeholder, and anonymizes age fields by binning the age.

    Args:
        value (Any): The value to be anonymized.
        case_id (str): The case ID to be used for hash-based anonymization.

    Returns:
        Any: The anonymized value.
    """
    from onconova.core.schemas import Period

    # Anonymize date/time fields by introducing a hash-based time-shift
    if isinstance(value, (datetime, date)) or (
        isinstance(value, (str)) and is_datetime(value, "%Y-%m-%d")
    ):
        anonymized_value = anonymize_clinically_relevant_date(value, case_id)
    # Anonymize string fields by replacing by a placeholder

    elif isinstance(value, (Period)):
        anonymized_value = Period(
            start=(
                anonymize_clinically_relevant_date(value.start, case_id)
                if value.start
                else None
            ),
            end=(
                anonymize_clinically_relevant_date(value.end, case_id)
                if value.end
                else None
            ),
        )
    elif isinstance(value, (str)) and is_period(value, "%Y-%m-%d"):
        period_start_string, period_end_string = value.strip("()[]").split(",")
        anonymized_value = Period(
            start=anonymize_clinically_relevant_date(period_start_string, case_id)
            or None,
            end=anonymize_clinically_relevant_date(period_end_string, case_id) or None,
        )
        anonymized_value = f"{anonymized_value.start} to {anonymized_value.end}"
    # Anonymize string fields by replacing by a placeholder
    elif isinstance(value, str):
        anonymized_value = anonymize_by_redacting_string(value)
    # Anonymize age fields by binning the age
    elif isinstance(value, Age):
        anonymized_value = anonymize_age(value)
    else:
        # Otherwise raise an error
        raise NotImplementedError(f"Could not anonymize value of type {type(value)}")
    return anonymized_value
runner