Skip to content

medspacy.postprocess.postprocessor

Postprocessor

Source code in medspacy/postprocess/postprocessor.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
@Language.factory("medspacy_postprocessor")
class Postprocessor:
    def __init__(
        self,
        nlp: Language,
        name: str = "medspacy_postprocessor",
        rules: Iterable[PostprocessingRule] = None,
        debug: bool = False,
        input_span_type: Literal["ents", "group"] = "ents",
        span_group_name: str = "medspacy_spans",
    ):
        self.nlp = nlp
        self.name = name
        self._rules = []
        self.debug = debug
        self._input_span_type = input_span_type
        self._span_group_name = span_group_name

        if rules:
            self.add(rules)

    @property
    def rules(self) -> List[PostprocessingRule]:
        """
        Gets the rules.

        Returns:
            The list of PostprocessingRules available to the Postprocessor.
        """
        return self._rules

    @property
    def input_span_type(self):
        """
        The input source of entities for the component. Must be either "ents" corresponding to doc.ents or "group" for
        a spaCy span group.

        Returns:
            The input type, "ents" or "group".
        """
        return self._input_span_type

    @input_span_type.setter
    def input_span_type(self, val):
        if not (val == "ents" or val == "group"):
            raise ValueError('input_span_type must be "ents" or "group".')
        self._input_span_type = val

    @property
    def span_group_name(self) -> str:
        """
        The name of the span group used by this component. If `input_span_type` is "group", calling this component will
        use spans in the span group with this name.

        Returns:
            The span group name.
        """
        return self._span_group_name

    @span_group_name.setter
    def span_group_name(self, name: str):
        if not name or not isinstance(name, str):
            raise ValueError("Span group name must be a string.")
        self._span_group_name = name

    def add(self, rules: Union[PostprocessingRule, Iterable[PostprocessingRule]]):
        """
        Adds PostprocessingRules to the Postprocessor.

        Args:
            rules: A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.
        """
        if isinstance(rules, PostprocessingRule):
            rules = [rules]
        for rule in rules:
            if not isinstance(rule, PostprocessingRule):
                raise TypeError(
                    f"Rules must be type PostprocessingRule, not {type(rule)}."
                )
            if rule.input_span_type is None:
                rule.input_span_type = self.input_span_type
        self._rules += rules

    def __call__(self, doc: Doc):
        """
        Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

        Args:
            doc: The Doc to process.

        Returns:
            The processed Doc.
        """
        # Iterate through the entities in reversed order
        if self._input_span_type == "ents":
            spans = doc.ents
        else:
            spans = doc.spans[self._span_group_name]

        for i in range(len(spans) - 1, -1, -1):
            ent = spans[i]
            if self.debug:
                print(ent)

            # let's keep track of whether the rule makes a change to spans
            span_count_before_rule = None
            if self._input_span_type == "ents":
                span_count_before_rule = len(doc.ents)
            else:
                span_count_before_rule = len(doc.spans[self.span_group_name])

            for rule in self.rules:
                rule(ent, i, debug=self.debug)
                # Check if the entity was removed based on span counts before and after rule execution
                # if it was, skip to the next entity
                try:
                    if self._input_span_type == "ents":
                        if len(doc.ents) != span_count_before_rule:
                            break
                    else:
                        if len(doc.spans[self.span_group_name]) != span_count_before_rule:
                            break
                except IndexError:
                    break
            # if self.debug:
            #     print()
        return doc

input_span_type property writable

The input source of entities for the component. Must be either "ents" corresponding to doc.ents or "group" for a spaCy span group.

Returns:

Type Description

The input type, "ents" or "group".

rules property

Gets the rules.

Returns:

Type Description
List[PostprocessingRule]

The list of PostprocessingRules available to the Postprocessor.

span_group_name property writable

The name of the span group used by this component. If input_span_type is "group", calling this component will use spans in the span group with this name.

Returns:

Type Description
str

The span group name.

__call__(doc)

Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

Parameters:

Name Type Description Default
doc Doc

The Doc to process.

required

Returns:

Type Description

The processed Doc.

Source code in medspacy/postprocess/postprocessor.py
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
def __call__(self, doc: Doc):
    """
    Calls the Postprocessor on a spaCy doc. This will call each PostprocessingRule on the doc.

    Args:
        doc: The Doc to process.

    Returns:
        The processed Doc.
    """
    # Iterate through the entities in reversed order
    if self._input_span_type == "ents":
        spans = doc.ents
    else:
        spans = doc.spans[self._span_group_name]

    for i in range(len(spans) - 1, -1, -1):
        ent = spans[i]
        if self.debug:
            print(ent)

        # let's keep track of whether the rule makes a change to spans
        span_count_before_rule = None
        if self._input_span_type == "ents":
            span_count_before_rule = len(doc.ents)
        else:
            span_count_before_rule = len(doc.spans[self.span_group_name])

        for rule in self.rules:
            rule(ent, i, debug=self.debug)
            # Check if the entity was removed based on span counts before and after rule execution
            # if it was, skip to the next entity
            try:
                if self._input_span_type == "ents":
                    if len(doc.ents) != span_count_before_rule:
                        break
                else:
                    if len(doc.spans[self.span_group_name]) != span_count_before_rule:
                        break
            except IndexError:
                break
        # if self.debug:
        #     print()
    return doc

add(rules)

Adds PostprocessingRules to the Postprocessor.

Parameters:

Name Type Description Default
rules Union[PostprocessingRule, Iterable[PostprocessingRule]]

A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.

required
Source code in medspacy/postprocess/postprocessor.py
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def add(self, rules: Union[PostprocessingRule, Iterable[PostprocessingRule]]):
    """
    Adds PostprocessingRules to the Postprocessor.

    Args:
        rules: A single PostprocessingRule or a collection of PostprocessingRules to add to the Postprocessor.
    """
    if isinstance(rules, PostprocessingRule):
        rules = [rules]
    for rule in rules:
        if not isinstance(rule, PostprocessingRule):
            raise TypeError(
                f"Rules must be type PostprocessingRule, not {type(rule)}."
            )
        if rule.input_span_type is None:
            rule.input_span_type = self.input_span_type
    self._rules += rules