medspacy.postprocess.postprocessing_functions

This module contains some simple functions that can be used as action or condition functions for postprocessing rules.

`ent_contains(ent, target, regex=True)`

Check if an entity occurs in the same sentence as another span of text. Case-insensitive.

Parameters:

Name	Type	Description	Default
`ent`	`Span`	The span to check.	required
`target`	`Union[str, Iterable[str]]`	A string or a collection of strings that will be searched inside `ent`.	required
`regex`	`bool`	If the `target` specified is a regex pattern. Default is True.	`True`

Returns:

Type	Description
`bool`	Whether the target is contained in the ent.

Source code in medspacy/postprocess/postprocessing_functions.py

def ent_contains(
    ent: Span, target: Union[str, Iterable[str]], regex: bool = True
) -> bool:
    """
    Check if an entity occurs in the same sentence as another span of text. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched inside `ent`.
        regex: If the `target` specified is a regex pattern. Default is True.

    Returns:
        Whether the target is contained in the ent.
    """
    return span_contains(ent, target, regex)

`is_family(span)`

Returns whether a span is marked as family.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required

Returns:

Type	Description
`bool`	Whether the specified span has span._.is_family set to True.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_family(span: Span) -> bool:
    """
    Returns whether a span is marked as family.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_family set to True.
    """
    return span._.is_family

`is_followed_by(ent, target, window=1)`

Checks if an entity is followed by a target word within a certain window. If any phrases in target are more than one token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

Parameters:

Name	Type	Description	Default
`ent`	`Span`	The span to check.	required
`target`	`Union[str, Iterable[str]]`	A string or a collection of strings that will be searched for in the text following `ent`.	required
`window`	`int`	The number of tokens to search for `target` following `ent`. Default is 1.	`1`

Returns:

Type	Description
`bool`	Whether the entity specified is followed by a target.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_followed_by(
    ent: Span, target: Union[str, Iterable[str]], window: int = 1
) -> bool:
    """
    Checks if an entity is followed by a target word within a certain window. If any phrases in target are more than one
    token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text following `ent`.
        window: The number of tokens to search for `target` following `ent`. Default is 1.

    Returns:
        Whether the entity specified is followed by a target.
    """
    following_span = ent.doc[ent.end : ent.end + window]
    following_string = " ".join([token.text.lower() for token in following_span])
    if isinstance(target, str):
        return target.lower() in following_string
    for string in target:
        if string.lower() in following_string:
            return True
    return False

`is_historical(span)`

Returns whether a span is marked as historical.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required

Returns:

Type	Description
`bool`	Whether the specified span has span._.is_historical set to True.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_historical(span: Span) -> bool:
    """
    Returns whether a span is marked as historical.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_historical set to True.
    """
    return span._.is_historical

`is_hypothetical(span)`

Returns whether a span is marked as hypothetical.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required

Returns:

Type	Description
`bool`	Whether the specified span has span._.is_hypothetical set to True.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_hypothetical(span: Span) -> bool:
    """
    Returns whether a span is marked as hypothetical.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_hypothetical set to True.
    """
    return span._.is_hypothetical

`is_modified_by_category(span, category)`

Returns whether a span is modified by a ConTextModifier of that type.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required
`category`	`str`	The category to check whether a ConTextModifier of that type modifies the span.	required

Returns:

Type	Description
`bool`	Whether the specified span has the specified modifier type.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_modified_by_category(span: Span, category: str) -> bool:
    """
    Returns whether a span is modified by a ConTextModifier of that type.

    Args:
        span: The span to check.
        category: The category to check whether a ConTextModifier of that type modifies the span.

    Returns:
        Whether the specified span has the specified modifier type.
    """
    for modifier in span._.modifiers:
        if modifier.category.upper() == category.upper():
            return True
    return False

`is_modified_by_text(span, target, regex=True)`

Returns whether a span is modified by a ConTextModifier with the specified text.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required
`target`	`Union[str, Iterable[str]]`	The category to check whether a ConTextModifier with this text modifies the span.	required
`regex`	`bool`	If the `target` specified is a regex pattern. Default is True.	`True`

Returns:

Type	Description
`bool`	Whether the specified span has the specified modifier type.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_modified_by_text(
    span: Span, target: Union[str, Iterable[str]], regex: bool = True
) -> bool:
    """
    Returns whether a span is modified by a ConTextModifier with the specified text.

    Args:
        span: The span to check.
        target: The category to check whether a ConTextModifier with this text modifies the span.
        regex: If the `target` specified is a regex pattern. Default is True.

    Returns:
        Whether the specified span has the specified modifier type.
    """
    for modifier in span._.modifiers:
        if span_contains(modifier.span, target, regex):
            return True
    return False

`is_negated(span)`

Returns whether a span is marked as negated.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required

Returns:

Type	Description
`bool`	Whether the specified span has span._.is_negated set to True.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_negated(span: Span) -> bool:
    """
    Returns whether a span is marked as negated.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_negated set to True.
    """
    return span._.is_negated

`is_preceded_by(ent, target, window=1)`

Checks if an entity is preceded by a target word within a certain window. If any phrases in target are more than one token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

Parameters:

Name	Type	Description	Default
`ent`	`Span`	The span to check.	required
`target`	`Union[str, Iterable[str]]`	A string or a collection of strings that will be searched for in the text preceding `ent`.	required
`window`	`int`	The number of tokens to search for `target` preceding `ent`. Default is 1.	`1`

Returns:

Type	Description
`bool`	Whether the entity specified is preceded by a target.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_preceded_by(
    ent: Span, target: Union[str, Iterable[str]], window: int = 1
) -> bool:
    """
    Checks if an entity is preceded by a target word within a certain window. If any phrases in target are more than one
    token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text preceding `ent`.
        window: The number of tokens to search for `target` preceding `ent`. Default is 1.

    Returns:
        Whether the entity specified is preceded by a target.
    """
    preceding_span = ent.doc[ent.start - window : ent.start]
    preceding_string = " ".join([token.text.lower() for token in preceding_span])
    if isinstance(target, str):
        return target.lower() in preceding_string
    for string in target:
        if string.lower() in preceding_string:
            return True
    return False

`is_uncertain(span)`

Returns whether a span is marked as uncertain.

Parameters:

Name	Type	Description	Default
`span`	`Span`	The span to check.	required

Returns:

Type	Description
`bool`	Whether the specified span has span._.is_uncertain set to True.

Source code in medspacy/postprocess/postprocessing_functions.py

def is_uncertain(span: Span) -> bool:
    """
    Returns whether a span is marked as uncertain.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_uncertain set to True.
    """
    return span._.is_uncertain

`remove_ent(ent, i, input_type='ents', span_group_name='medspacy_spans')`

Remove an entity at position [i] from doc.ents.

Parameters:

Name	Type	Description	Default
`ent`	`Span`	The entity to remove.	required
`i`	`int`	The index of `ent` in its source list.	required
`input_type`	`Literal['ents', 'group']`	The source of the entity, either "ents" or "group".	`'ents'`
`span_group_name`	`str`	If `input_type` is "group", the name of the span group.	`'medspacy_spans'`

Source code in medspacy/postprocess/postprocessing_functions.py

def remove_ent(
    ent: Span,
    i: int,
    input_type: Literal["ents", "group"] = "ents",
    span_group_name: str = "medspacy_spans",
):
    """
    Remove an entity at position [i] from doc.ents.

    Args:
        ent: The entity to remove.
        i: The index of `ent` in its source list.
        input_type: The source of the entity, either "ents" or "group".
        span_group_name: If `input_type` is "group", the name of the span group.
    """
    doc = ent.doc
    if input_type == "ents":
        doc.ents = doc.ents[:i] + doc.ents[i + 1 :]
    elif input_type == "group":
        t = list(doc.spans[span_group_name])
        doc.spans[span_group_name] = t[:i] + t[i + 1 :]

`sentence_contains(ent, target, regex=True)`

Check if an entity occurs in the same sentence as another span of text.

Parameters:

Name	Type	Description	Default
`ent`	`Span`	The span to check.	required
`target`	`Union[str, Iterable[str]]`	A string or a collection of strings that will be searched for in the text of the sentence containing `ent`.	required
`regex`		If the `target` specified is a regex pattern. Default is True.	`True`

Source code in medspacy/postprocess/postprocessing_functions.py

def sentence_contains(ent: Span, target: Union[str, Iterable[str]], regex=True) -> bool:
    """
    Check if an entity occurs in the same sentence as another span of text.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text of the sentence containing
            `ent`.
        regex: If the `target` specified is a regex pattern. Default is True.
    """
    return span_contains(ent.sent, target, regex)

`set_family(ent, i, value=True)`

Set the value of ent._.is_family to value.

Source code in medspacy/postprocess/postprocessing_functions.py

def set_family(ent, i, value=True):
    "Set the value of ent._.is_family to value."
    ent._.is_hypothetical = value

`set_historical(ent, i, value=True)`

Set the value of ent._.is_historical to value.

Source code in medspacy/postprocess/postprocessing_functions.py

def set_historical(ent, i, value=True):
    """Set the value of ent._.is_historical to value."""
    ent._.is_historical = value

`set_hypothetical(ent, i, value=True)`

Set the value of ent._.is_hypothetical to value.

Source code in medspacy/postprocess/postprocessing_functions.py

def set_hypothetical(ent, i, value=True):
    """Set the value of ent._.is_hypothetical to value."""
    ent._.is_hypothetical = value

`set_label(ent, i, input_type='ents', span_group_name='medspacy_spans', **kwargs)`

Creates a copy of the entity with a new label.

WARNING: This is not fully safe, as spaCy does not allow modifying the label of a span. Instead, this creates a new copy and attempts to copy existing attributes, but this is not totally reliable.

Parameters:

Name	Type	Description	Default
`ent`		The entity to MODIFY.	required
`i`		The index of `ent` in its source list.	required
`input_type`	`Literal['ents', 'group']`	The source of the entity, either "ents" or "group".	`'ents'`
`span_group_name`	`str`	If `input_type` is "group", the name of the span group.	`'medspacy_spans'`

Source code in medspacy/postprocess/postprocessing_functions.py

def set_label(
    ent,
    i,
    input_type: Literal["ents", "group"] = "ents",
    span_group_name: str = "medspacy_spans",
    **kwargs
):
    """
    Creates a copy of the entity with a new label.

    WARNING: This is not fully safe, as spaCy does not allow modifying the label of a span. Instead, this creates a new
    copy and attempts to copy existing attributes, but this is not totally reliable.

    Args:
        ent: The entity to MODIFY.
        i: The index of `ent` in its source list.
        input_type: The source of the entity, either "ents" or "group".
        span_group_name: If `input_type` is "group", the name of the span group.
    """
    from spacy.tokens import Span

    new_ent = Span(ent.doc, ent.start, ent.end, label=kwargs["label"])
    # Copy any additional attributes
    # NOTE: This may not be complete and should be used with caution
    for (attr, values) in ent._.__dict__["_extensions"].items():
        setattr(new_ent._, attr, values[0])
    if input_type == "ents":
        if len(ent.doc.ents) == 1:
            ent.doc.ents = (new_ent,)
        else:
            ent.doc.ents = ent.doc.ents[:i] + (new_ent,) + ent.doc.ents[i + 1 :]
    else:
        if len(ent.doc.spans[span_group_name] == 1):
            ent.doc.spans[span_group_name] = (new_ent,)
        else:
            ent.doc.spans[span_group_name] = (
                ent.doc.spans[span_group_name][:i]
                + (new_ent,)
                + ent.doc.spans[span_group_name][i + 1 :]
            )

`set_negated(ent, i, value=True)`

Set the value of ent._.is_negated to value.

Source code in medspacy/postprocess/postprocessing_functions.py

def set_negated(ent, i, value=True):
    """Set the value of ent._.is_negated to value."""
    ent._.is_negated = value

`set_uncertain(ent, i, value=True)`

Set the value of ent._.is_uncertain to value.

Source code in medspacy/postprocess/postprocessing_functions.py

def set_uncertain(ent, i, value=True):
    """Set the value of ent._.is_uncertain to value."""
    ent._.is_uncertain = value