Skip to content

medspacy.postprocess

PostprocessingPattern

PostprocessingPatterns are callable functions and equality values wrapped together that will create triggers in the later Postprocessor as part of PostprocessingRules.

Source code in medspacy/postprocess/postprocessing_pattern.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
class PostprocessingPattern:
    """
    PostprocessingPatterns are callable functions and equality values wrapped together that will create triggers
    in the later Postprocessor as part of PostprocessingRules.
    """

    def __init__(self, condition: Callable, success_value: Any = True, **kwargs):
        """
        A PostprocessingPattern defines a single condition to check against an entity.

        Args:
            condition: A function to call on an entity. If the result of the function call equals success_value, then
                the pattern passes.
            success_value: The value which should be returned by condition(ent) in order for the pattern to pass. Must
                have == defined for condition(ent) == success_value.
            kwargs: Optional keyword arguments to call with condition(ent, **kwargs).
        """
        self.condition = condition
        self.success_value = success_value
        self.kwargs = kwargs

    def __call__(self, ent: Span) -> bool:
        """
        Call the PostprocessingPattern on the span specified.

        Args:
            ent: the span to process.

        Returns:
            Whether calling `condition` on the entity specified is `success_value`.
        """
        if self.kwargs:
            result = self.condition(ent, **self.kwargs)
        else:
            result = self.condition(ent)
        return result == self.success_value

__call__(ent)

Call the PostprocessingPattern on the span specified.

Parameters:

Name Type Description Default
ent Span

the span to process.

required

Returns:

Type Description
bool

Whether calling condition on the entity specified is success_value.

Source code in medspacy/postprocess/postprocessing_pattern.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def __call__(self, ent: Span) -> bool:
    """
    Call the PostprocessingPattern on the span specified.

    Args:
        ent: the span to process.

    Returns:
        Whether calling `condition` on the entity specified is `success_value`.
    """
    if self.kwargs:
        result = self.condition(ent, **self.kwargs)
    else:
        result = self.condition(ent)
    return result == self.success_value

__init__(condition, success_value=True, **kwargs)

A PostprocessingPattern defines a single condition to check against an entity.

Parameters:

Name Type Description Default
condition Callable

A function to call on an entity. If the result of the function call equals success_value, then the pattern passes.

required
success_value Any

The value which should be returned by condition(ent) in order for the pattern to pass. Must have == defined for condition(ent) == success_value.

True
kwargs

Optional keyword arguments to call with condition(ent, **kwargs).

{}
Source code in medspacy/postprocess/postprocessing_pattern.py
12
13
14
15
16
17
18
19
20
21
22
23
24
25
def __init__(self, condition: Callable, success_value: Any = True, **kwargs):
    """
    A PostprocessingPattern defines a single condition to check against an entity.

    Args:
        condition: A function to call on an entity. If the result of the function call equals success_value, then
            the pattern passes.
        success_value: The value which should be returned by condition(ent) in order for the pattern to pass. Must
            have == defined for condition(ent) == success_value.
        kwargs: Optional keyword arguments to call with condition(ent, **kwargs).
    """
    self.condition = condition
    self.success_value = success_value
    self.kwargs = kwargs

PostprocessingRule

Source code in medspacy/postprocess/postprocessing_rule.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
class PostprocessingRule:
    def __init__(
        self,
        patterns: Iterable[PostprocessingPattern],
        action: Callable,
        name: str = None,
        description: str = None,
        span_group_name: str = "medspacy_spans",
        **kwargs,
    ):
        """
        A PostprocessingRule checks conditions of a spaCy Span entity and executes some action if all rules are met.

        patterns: A list of PostprocessingPatterns, each of which check a condition of an entity.
        action: A function to call with the entity as an argument. This function should take the following arguments:
            ent: The spacy span
            i: The index of ent
            input_span_type: "ents" or "group". Describes where to look for spans.
            span_group_name: The name of the span group used when `input_span_type` is "group".
            kwargs: Any additional keyword arguments for action.
        name: Optional name of direction.
        description: Optional description of the direction.
        kwargs: Optional keyword arguments to send to `action`.

        """
        self.patterns = patterns
        self.action = action
        self.name = name
        self.description = description
        self.input_span_type = None
        self.span_group_name = span_group_name
        self.kwargs = kwargs

    def __call__(self, ent, i, debug=False):
        """
        Iterate through all the rules in self.rules.
        If any pattern does not pass (ie., return True), then returns False.
        If they all pass, execute self.action and return True.
        """
        for pattern in self.patterns:
            # If this is a tuple, at least one has to pass
            if isinstance(pattern, tuple):
                passed = False
                for subpattern in pattern:
                    rslt = subpattern(ent)
                    if rslt is True:
                        passed = True
                        break
                if passed is False:
                    return False
            # Otherwise just check a single value
            else:
                rslt = pattern(ent)
                if rslt is False:
                    return False

        # Every pattern passed - do the action
        if debug:
            print("Passed:", self, "on ent:", ent, ent.sent)

        try:
            if self.kwargs:
                self.action(
                    ent, i, self.input_span_type, self.span_group_name, **self.kwargs
                )
            else:
                self.action(ent, i, self.input_span_type, self.span_group_name)
        except TypeError:
            _raise_action_error(
                self.action,
                (ent, i, self.input_span_type, self.span_group_name, self.kwargs),
            )

    def __repr__(self):
        return f"PostprocessingRule: {self.name} - {self.description}"

__call__(ent, i, debug=False)

Iterate through all the rules in self.rules. If any pattern does not pass (ie., return True), then returns False. If they all pass, execute self.action and return True.

Source code in medspacy/postprocess/postprocessing_rule.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
def __call__(self, ent, i, debug=False):
    """
    Iterate through all the rules in self.rules.
    If any pattern does not pass (ie., return True), then returns False.
    If they all pass, execute self.action and return True.
    """
    for pattern in self.patterns:
        # If this is a tuple, at least one has to pass
        if isinstance(pattern, tuple):
            passed = False
            for subpattern in pattern:
                rslt = subpattern(ent)
                if rslt is True:
                    passed = True
                    break
            if passed is False:
                return False
        # Otherwise just check a single value
        else:
            rslt = pattern(ent)
            if rslt is False:
                return False

    # Every pattern passed - do the action
    if debug:
        print("Passed:", self, "on ent:", ent, ent.sent)

    try:
        if self.kwargs:
            self.action(
                ent, i, self.input_span_type, self.span_group_name, **self.kwargs
            )
        else:
            self.action(ent, i, self.input_span_type, self.span_group_name)
    except TypeError:
        _raise_action_error(
            self.action,
            (ent, i, self.input_span_type, self.span_group_name, self.kwargs),
        )

__init__(patterns, action, name=None, description=None, span_group_name='medspacy_spans', **kwargs)

A PostprocessingRule checks conditions of a spaCy Span entity and executes some action if all rules are met.

patterns: A list of PostprocessingPatterns, each of which check a condition of an entity. action: A function to call with the entity as an argument. This function should take the following arguments: ent: The spacy span i: The index of ent input_span_type: "ents" or "group". Describes where to look for spans. span_group_name: The name of the span group used when input_span_type is "group". kwargs: Any additional keyword arguments for action. name: Optional name of direction. description: Optional description of the direction. kwargs: Optional keyword arguments to send to action.

Source code in medspacy/postprocess/postprocessing_rule.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
def __init__(
    self,
    patterns: Iterable[PostprocessingPattern],
    action: Callable,
    name: str = None,
    description: str = None,
    span_group_name: str = "medspacy_spans",
    **kwargs,
):
    """
    A PostprocessingRule checks conditions of a spaCy Span entity and executes some action if all rules are met.

    patterns: A list of PostprocessingPatterns, each of which check a condition of an entity.
    action: A function to call with the entity as an argument. This function should take the following arguments:
        ent: The spacy span
        i: The index of ent
        input_span_type: "ents" or "group". Describes where to look for spans.
        span_group_name: The name of the span group used when `input_span_type` is "group".
        kwargs: Any additional keyword arguments for action.
    name: Optional name of direction.
    description: Optional description of the direction.
    kwargs: Optional keyword arguments to send to `action`.

    """
    self.patterns = patterns
    self.action = action
    self.name = name
    self.description = description
    self.input_span_type = None
    self.span_group_name = span_group_name
    self.kwargs = kwargs

Postprocessor

Source code in medspacy/postprocess/postprocessor.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
@Language.factory("medspacy_postprocessor")
class Postprocessor:
    def __init__(
        self,
        nlp: Language,
        name: str = "medspacy_postprocessor",
        rules: Iterable[PostprocessingRule] = None,
        debug: bool = False,
        input_span_type: Literal["ents", "group"] = "ents",
        span_group_name: str = "medspacy_spans",
    ):
        self.nlp = nlp
        self.name = name
        self._rules = []
        self.debug = debug
        self._input_span_type = input_span_type
        self._span_group_name = span_group_name

        if rules:
            self.add(rules)

    @property
    def rules(self) -> List[PostprocessingRule]:
        """
        Gets the rules.

        Returns:
            The list of PostprocessingRules available to the Postprocessor.
        """
        return self._rules

    @property
    def input_span_type(self):
        """
        The input source of entities for the component. Must be either "ents" corresponding to doc.ents or "group" for
        a spaCy span group.

        Returns:
            The input type, "ents" or "group".
        """
        return self._input_span_type

    @input_span_type.setter
    def input_span_type(self, val):
        if not (val == "ents" or val == "group"):
            raise ValueError('input_span_type must be "ents" or "group".')
        self._input_span_type = val

    @property
    def span_group_name(self) -> str:
        """
        The name of the span group used by this component. If `input_span_type` is "group", calling this component will
        use spans in the span group with this name.

        Returns:
            The span group name.
        """
        return self._span_group_name

    @span_group_name.setter
    def span_group_name(self, name: str):
        if not name or not isinstance(name, str):
            raise ValueError("Span group name must be a string.")
        self._span_group_name = name

    def add(self, rules: Union[PostprocessingRule, Iterable[PostprocessingRule]]):
        """
        Adds PostprocessingRules to the Postprocessor.

        Args:
            rules: A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.
        """
        if isinstance(rules, PostprocessingRule):
            rules = [rules]
        for rule in rules:
            if not isinstance(rule, PostprocessingRule):
                raise TypeError(
                    f"Rules must be type PostprocessingRule, not {type(rule)}."
                )
            if rule.input_span_type is None:
                rule.input_span_type = self.input_span_type
        self._rules += rules

    def __call__(self, doc: Doc):
        """
        Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

        Args:
            doc: The Doc to process.

        Returns:
            The processed Doc.
        """
        # Iterate through the entities in reversed order
        if self._input_span_type == "ents":
            spans = doc.ents
        else:
            spans = doc.spans[self._span_group_name]

        for i in range(len(spans) - 1, -1, -1):
            ent = spans[i]
            if self.debug:
                print(ent)

            # let's keep track of whether the rule makes a change to spans
            span_count_before_rule = None
            if self._input_span_type == "ents":
                span_count_before_rule = len(doc.ents)
            else:
                span_count_before_rule = len(doc.spans[self.span_group_name])

            for rule in self.rules:
                rule(ent, i, debug=self.debug)
                # Check if the entity was removed based on span counts before and after rule execution
                # if it was, skip to the next entity
                try:
                    if self._input_span_type == "ents":
                        if len(doc.ents) != span_count_before_rule:
                            break
                    else:
                        if len(doc.spans[self.span_group_name]) != span_count_before_rule:
                            break
                except IndexError:
                    break
            # if self.debug:
            #     print()
        return doc

input_span_type property writable

The input source of entities for the component. Must be either "ents" corresponding to doc.ents or "group" for a spaCy span group.

Returns:

Type Description

The input type, "ents" or "group".

rules property

Gets the rules.

Returns:

Type Description
List[PostprocessingRule]

The list of PostprocessingRules available to the Postprocessor.

span_group_name property writable

The name of the span group used by this component. If input_span_type is "group", calling this component will use spans in the span group with this name.

Returns:

Type Description
str

The span group name.

__call__(doc)

Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

Parameters:

Name Type Description Default
doc Doc

The Doc to process.

required

Returns:

Type Description

The processed Doc.

Source code in medspacy/postprocess/postprocessor.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __call__(self, doc: Doc):
    """
    Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

    Args:
        doc: The Doc to process.

    Returns:
        The processed Doc.
    """
    # Iterate through the entities in reversed order
    if self._input_span_type == "ents":
        spans = doc.ents
    else:
        spans = doc.spans[self._span_group_name]

    for i in range(len(spans) - 1, -1, -1):
        ent = spans[i]
        if self.debug:
            print(ent)

        # let's keep track of whether the rule makes a change to spans
        span_count_before_rule = None
        if self._input_span_type == "ents":
            span_count_before_rule = len(doc.ents)
        else:
            span_count_before_rule = len(doc.spans[self.span_group_name])

        for rule in self.rules:
            rule(ent, i, debug=self.debug)
            # Check if the entity was removed based on span counts before and after rule execution
            # if it was, skip to the next entity
            try:
                if self._input_span_type == "ents":
                    if len(doc.ents) != span_count_before_rule:
                        break
                else:
                    if len(doc.spans[self.span_group_name]) != span_count_before_rule:
                        break
            except IndexError:
                break
        # if self.debug:
        #     print()
    return doc

add(rules)

Adds PostprocessingRules to the Postprocessor.

Parameters:

Name Type Description Default
rules Union[PostprocessingRule, Iterable[PostprocessingRule]]

A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.

required
Source code in medspacy/postprocess/postprocessor.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def add(self, rules: Union[PostprocessingRule, Iterable[PostprocessingRule]]):
    """
    Adds PostprocessingRules to the Postprocessor.

    Args:
        rules: A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.
    """
    if isinstance(rules, PostprocessingRule):
        rules = [rules]
    for rule in rules:
        if not isinstance(rule, PostprocessingRule):
            raise TypeError(
                f"Rules must be type PostprocessingRule, not {type(rule)}."
            )
        if rule.input_span_type is None:
            rule.input_span_type = self.input_span_type
    self._rules += rules