Skip to content

medspacy.postprocess.postprocessing_functions

This module contains some simple functions that can be used as action or condition functions for postprocessing rules.

ent_contains(ent, target, regex=True)

Check if an entity occurs in the same sentence as another span of text. Case-insensitive.

Parameters:

Name Type Description Default
ent Span

The span to check.

required
target Union[str, Iterable[str]]

A string or a collection of strings that will be searched inside ent.

required
regex bool

If the target specified is a regex pattern. Default is True.

True

Returns:

Type Description
bool

Whether the target is contained in the ent.

Source code in medspacy/postprocess/postprocessing_functions.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
def ent_contains(
    ent: Span, target: Union[str, Iterable[str]], regex: bool = True
) -> bool:
    """
    Check if an entity occurs in the same sentence as another span of text. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched inside `ent`.
        regex: If the `target` specified is a regex pattern. Default is True.

    Returns:
        Whether the target is contained in the ent.
    """
    return span_contains(ent, target, regex)

is_family(span)

Returns whether a span is marked as family.

Parameters:

Name Type Description Default
span Span

The span to check.

required

Returns:

Type Description
bool

Whether the specified span has span._.is_family set to True.

Source code in medspacy/postprocess/postprocessing_functions.py
63
64
65
66
67
68
69
70
71
72
73
def is_family(span: Span) -> bool:
    """
    Returns whether a span is marked as family.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_family set to True.
    """
    return span._.is_family

is_followed_by(ent, target, window=1)

Checks if an entity is followed by a target word within a certain window. If any phrases in target are more than one token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

Parameters:

Name Type Description Default
ent Span

The span to check.

required
target Union[str, Iterable[str]]

A string or a collection of strings that will be searched for in the text following ent.

required
window int

The number of tokens to search for target following ent. Default is 1.

1

Returns:

Type Description
bool

Whether the entity specified is followed by a target.

Source code in medspacy/postprocess/postprocessing_functions.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def is_followed_by(
    ent: Span, target: Union[str, Iterable[str]], window: int = 1
) -> bool:
    """
    Checks if an entity is followed by a target word within a certain window. If any phrases in target are more than one
    token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text following `ent`.
        window: The number of tokens to search for `target` following `ent`. Default is 1.

    Returns:
        Whether the entity specified is followed by a target.
    """
    following_span = ent.doc[ent.end : ent.end + window]
    following_string = " ".join([token.text.lower() for token in following_span])
    if isinstance(target, str):
        return target.lower() in following_string
    for string in target:
        if string.lower() in following_string:
            return True
    return False

is_historical(span)

Returns whether a span is marked as historical.

Parameters:

Name Type Description Default
span Span

The span to check.

required

Returns:

Type Description
bool

Whether the specified span has span._.is_historical set to True.

Source code in medspacy/postprocess/postprocessing_functions.py
37
38
39
40
41
42
43
44
45
46
47
def is_historical(span: Span) -> bool:
    """
    Returns whether a span is marked as historical.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_historical set to True.
    """
    return span._.is_historical

is_hypothetical(span)

Returns whether a span is marked as hypothetical.

Parameters:

Name Type Description Default
span Span

The span to check.

required

Returns:

Type Description
bool

Whether the specified span has span._.is_hypothetical set to True.

Source code in medspacy/postprocess/postprocessing_functions.py
50
51
52
53
54
55
56
57
58
59
60
def is_hypothetical(span: Span) -> bool:
    """
    Returns whether a span is marked as hypothetical.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_hypothetical set to True.
    """
    return span._.is_hypothetical

is_modified_by_category(span, category)

Returns whether a span is modified by a ConTextModifier of that type.

Parameters:

Name Type Description Default
span Span

The span to check.

required
category str

The category to check whether a ConTextModifier of that type modifies the span.

required

Returns:

Type Description
bool

Whether the specified span has the specified modifier type.

Source code in medspacy/postprocess/postprocessing_functions.py
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def is_modified_by_category(span: Span, category: str) -> bool:
    """
    Returns whether a span is modified by a ConTextModifier of that type.

    Args:
        span: The span to check.
        category: The category to check whether a ConTextModifier of that type modifies the span.

    Returns:
        Whether the specified span has the specified modifier type.
    """
    for modifier in span._.modifiers:
        if modifier.category.upper() == category.upper():
            return True
    return False

is_modified_by_text(span, target, regex=True)

Returns whether a span is modified by a ConTextModifier with the specified text.

Parameters:

Name Type Description Default
span Span

The span to check.

required
target Union[str, Iterable[str]]

The category to check whether a ConTextModifier with this text modifies the span.

required
regex bool

If the target specified is a regex pattern. Default is True.

True

Returns:

Type Description
bool

Whether the specified span has the specified modifier type.

Source code in medspacy/postprocess/postprocessing_functions.py
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def is_modified_by_text(
    span: Span, target: Union[str, Iterable[str]], regex: bool = True
) -> bool:
    """
    Returns whether a span is modified by a ConTextModifier with the specified text.

    Args:
        span: The span to check.
        target: The category to check whether a ConTextModifier with this text modifies the span.
        regex: If the `target` specified is a regex pattern. Default is True.

    Returns:
        Whether the specified span has the specified modifier type.
    """
    for modifier in span._.modifiers:
        if span_contains(modifier.span, target, regex):
            return True
    return False

is_negated(span)

Returns whether a span is marked as negated.

Parameters:

Name Type Description Default
span Span

The span to check.

required

Returns:

Type Description
bool

Whether the specified span has span._.is_negated set to True.

Source code in medspacy/postprocess/postprocessing_functions.py
11
12
13
14
15
16
17
18
19
20
21
def is_negated(span: Span) -> bool:
    """
    Returns whether a span is marked as negated.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_negated set to True.
    """
    return span._.is_negated

is_preceded_by(ent, target, window=1)

Checks if an entity is preceded by a target word within a certain window. If any phrases in target are more than one token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

Parameters:

Name Type Description Default
ent Span

The span to check.

required
target Union[str, Iterable[str]]

A string or a collection of strings that will be searched for in the text preceding ent.

required
window int

The number of tokens to search for target preceding ent. Default is 1.

1

Returns:

Type Description
bool

Whether the entity specified is preceded by a target.

Source code in medspacy/postprocess/postprocessing_functions.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def is_preceded_by(
    ent: Span, target: Union[str, Iterable[str]], window: int = 1
) -> bool:
    """
    Checks if an entity is preceded by a target word within a certain window. If any phrases in target are more than one
    token long, this may not capture it if window is smaller than the number of tokens. Case-insensitive.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text preceding `ent`.
        window: The number of tokens to search for `target` preceding `ent`. Default is 1.

    Returns:
        Whether the entity specified is preceded by a target.
    """
    preceding_span = ent.doc[ent.start - window : ent.start]
    preceding_string = " ".join([token.text.lower() for token in preceding_span])
    if isinstance(target, str):
        return target.lower() in preceding_string
    for string in target:
        if string.lower() in preceding_string:
            return True
    return False

is_uncertain(span)

Returns whether a span is marked as uncertain.

Parameters:

Name Type Description Default
span Span

The span to check.

required

Returns:

Type Description
bool

Whether the specified span has span._.is_uncertain set to True.

Source code in medspacy/postprocess/postprocessing_functions.py
24
25
26
27
28
29
30
31
32
33
34
def is_uncertain(span: Span) -> bool:
    """
    Returns whether a span is marked as uncertain.

    Args:
        span: The span to check.

    Returns:
        Whether the specified span has span._.is_uncertain set to True.
    """
    return span._.is_uncertain

remove_ent(ent, i, input_type='ents', span_group_name='medspacy_spans')

Remove an entity at position [i] from doc.ents.

Parameters:

Name Type Description Default
ent Span

The entity to remove.

required
i int

The index of ent in its source list.

required
input_type Literal['ents', 'group']

The source of the entity, either "ents" or "group".

'ents'
span_group_name str

If input_type is "group", the name of the span group.

'medspacy_spans'
Source code in medspacy/postprocess/postprocessing_functions.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
def remove_ent(
    ent: Span,
    i: int,
    input_type: Literal["ents", "group"] = "ents",
    span_group_name: str = "medspacy_spans",
):
    """
    Remove an entity at position [i] from doc.ents.

    Args:
        ent: The entity to remove.
        i: The index of `ent` in its source list.
        input_type: The source of the entity, either "ents" or "group".
        span_group_name: If `input_type` is "group", the name of the span group.
    """
    doc = ent.doc
    if input_type == "ents":
        doc.ents = doc.ents[:i] + doc.ents[i + 1 :]
    elif input_type == "group":
        t = list(doc.spans[span_group_name])
        doc.spans[span_group_name] = t[:i] + t[i + 1 :]

sentence_contains(ent, target, regex=True)

Check if an entity occurs in the same sentence as another span of text.

Parameters:

Name Type Description Default
ent Span

The span to check.

required
target Union[str, Iterable[str]]

A string or a collection of strings that will be searched for in the text of the sentence containing ent.

required
regex

If the target specified is a regex pattern. Default is True.

True
Source code in medspacy/postprocess/postprocessing_functions.py
180
181
182
183
184
185
186
187
188
189
190
def sentence_contains(ent: Span, target: Union[str, Iterable[str]], regex=True) -> bool:
    """
    Check if an entity occurs in the same sentence as another span of text.

    Args:
        ent: The span to check.
        target: A string or a collection of strings that will be searched for in the text of the sentence containing
            `ent`.
        regex: If the `target` specified is a regex pattern. Default is True.
    """
    return span_contains(ent.sent, target, regex)

set_family(ent, i, value=True)

Set the value of ent._.is_family to value.

Source code in medspacy/postprocess/postprocessing_functions.py
279
280
281
def set_family(ent, i, value=True):
    "Set the value of ent._.is_family to value."
    ent._.is_hypothetical = value

set_historical(ent, i, value=True)

Set the value of ent._.is_historical to value.

Source code in medspacy/postprocess/postprocessing_functions.py
269
270
271
def set_historical(ent, i, value=True):
    """Set the value of ent._.is_historical to value."""
    ent._.is_historical = value

set_hypothetical(ent, i, value=True)

Set the value of ent._.is_hypothetical to value.

Source code in medspacy/postprocess/postprocessing_functions.py
274
275
276
def set_hypothetical(ent, i, value=True):
    """Set the value of ent._.is_hypothetical to value."""
    ent._.is_hypothetical = value

set_label(ent, i, input_type='ents', span_group_name='medspacy_spans', **kwargs)

Creates a copy of the entity with a new label.

WARNING: This is not fully safe, as spaCy does not allow modifying the label of a span. Instead, this creates a new copy and attempts to copy existing attributes, but this is not totally reliable.

Parameters:

Name Type Description Default
ent

The entity to MODIFY.

required
i

The index of ent in its source list.

required
input_type Literal['ents', 'group']

The source of the entity, either "ents" or "group".

'ents'
span_group_name str

If input_type is "group", the name of the span group.

'medspacy_spans'
Source code in medspacy/postprocess/postprocessing_functions.py
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
def set_label(
    ent,
    i,
    input_type: Literal["ents", "group"] = "ents",
    span_group_name: str = "medspacy_spans",
    **kwargs
):
    """
    Creates a copy of the entity with a new label.

    WARNING: This is not fully safe, as spaCy does not allow modifying the label of a span. Instead, this creates a new
    copy and attempts to copy existing attributes, but this is not totally reliable.

    Args:
        ent: The entity to MODIFY.
        i: The index of `ent` in its source list.
        input_type: The source of the entity, either "ents" or "group".
        span_group_name: If `input_type` is "group", the name of the span group.
    """
    from spacy.tokens import Span

    new_ent = Span(ent.doc, ent.start, ent.end, label=kwargs["label"])
    # Copy any additional attributes
    # NOTE: This may not be complete and should be used with caution
    for (attr, values) in ent._.__dict__["_extensions"].items():
        setattr(new_ent._, attr, values[0])
    if input_type == "ents":
        if len(ent.doc.ents) == 1:
            ent.doc.ents = (new_ent,)
        else:
            ent.doc.ents = ent.doc.ents[:i] + (new_ent,) + ent.doc.ents[i + 1 :]
    else:
        if len(ent.doc.spans[span_group_name] == 1):
            ent.doc.spans[span_group_name] = (new_ent,)
        else:
            ent.doc.spans[span_group_name] = (
                ent.doc.spans[span_group_name][:i]
                + (new_ent,)
                + ent.doc.spans[span_group_name][i + 1 :]
            )

set_negated(ent, i, value=True)

Set the value of ent._.is_negated to value.

Source code in medspacy/postprocess/postprocessing_functions.py
259
260
261
def set_negated(ent, i, value=True):
    """Set the value of ent._.is_negated to value."""
    ent._.is_negated = value

set_uncertain(ent, i, value=True)

Set the value of ent._.is_uncertain to value.

Source code in medspacy/postprocess/postprocessing_functions.py
264
265
266
def set_uncertain(ent, i, value=True):
    """Set the value of ent._.is_uncertain to value."""
    ent._.is_uncertain = value