Skip to content

medspacy.target_matcher.target_rule

TargetRule

Bases: BaseRule

TargetRule defines rules for extracting entities from text using the TargetMatcher.

Source code in medspacy/target_matcher/target_rule.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
class TargetRule(BaseRule):
    """
    TargetRule defines rules for extracting entities from text using the TargetMatcher.
    """

    _ALLOWED_KEYS = {
        "literal",
        "pattern",
        "category",
        "metadata",
        "attributes",
    }

    def __init__(
        self,
        literal: str,
        category: str,
        pattern: Optional[Union[List[Dict[str, str]], str]] = None,
        on_match: Optional[
            Callable[[Matcher, Doc, int, List[Tuple[int, int, int]]], Any]
        ] = None,
        attributes: Optional[Dict[str, Any]] = None,
        metadata: Optional[Dict[Any, Any]] = None,
    ):
        """
        Creates a new TargetRule.

        Args:
            literal: The string representation of a concept. If `pattern` is None, this string will be lower-cased and
                matched to the lower-case string. If `pattern` is not None, this argument will not be used for matching
                but can be used as a reference as the rule name.
            category: The semantic class of the matched span. This corresponds to the `label_` attribute of an entity.
            pattern: A list or string to use as a spaCy pattern rather than `literal`. If a list, will use spaCy
                token-based pattern matching to match using token attributes. If a string, will use medspaCy's
                RegexMatcher. If None, will use `literal` as the pattern for phrase matching. For more information, see
                https://spacy.io/usage/rule-based-matching.
            on_match: An optional callback function or other callable which takes 4 arguments: `(matcher, doc, i,
                matches)`. For more information, see https://spacy.io/usage/rule-based-matching#on_match
            attributes: Optional custom attribute names to set for a Span matched by the direction. These attribute
                names are stored under Span._.[attribute_name]. For example, if `attributes={'is_historical':True}`,
                then any spans matched by this direction will have span._.is_historical = True
            metadata: Optional dictionary of any extra metadata.
        """
        super().__init__(literal, category, pattern, on_match, metadata)
        self.attributes = attributes
        self._rule_id = None

    @classmethod
    def from_json(cls, filepath: str) -> List[TargetRule]:
        """Read in a lexicon of modifiers from a JSON file.

        Args:
            filepath: the .json file containing modifier rules

        Returns:
            context_item: A list of ConTextRule objects.

        Raises:
            KeyError: If the dictionary contains any keys other than
                those accepted by ConTextRule.__init__
        """
        import json

        with open(filepath) as file:
            target_data = json.load(file)
        target_rules = []
        for data in target_data["target_rules"]:
            target_rules.append(TargetRule.from_dict(data))
        return target_rules

    @classmethod
    def from_dict(cls, rule_dict: Dict) -> TargetRule:
        """Reads a dictionary into a ConTextRule. Used when reading from a json file.

        Args:
            rule_dict: the dictionary to convert

        Returns:
            The ConTextRule created from the dictionary

        Raises:
            ValueError: if the json is invalid
        """
        keys = set(rule_dict.keys())
        invalid_keys = keys.difference(cls._ALLOWED_KEYS)
        if invalid_keys:
            msg = (
                "JSON object contains invalid keys: {0}.\n"
                "Must be one of: {1}".format(invalid_keys, cls._ALLOWED_KEYS)
            )
            raise ValueError(msg)
        rule = TargetRule(**rule_dict)
        return rule

    @classmethod
    def to_json(cls, target_rules: List[TargetRule], filepath: str):
        """Writes ConTextItems to a json file.

        Args:
            target_rules: a list of TargetRules that will be written to a file.
            filepath: the .json file to contain modifier rules
        """
        import json

        data = {"target_rules": [rule.to_dict() for rule in target_rules]}
        with open(filepath, "w") as file:
            json.dump(data, file, indent=4)

    def to_dict(self):
        """Converts TargetRules to a python dictionary. Used when writing target rules to a json file.

        Returns:
            The dictionary containing the TargetRule info.
        """
        rule_dict = {}
        for key in self._ALLOWED_KEYS:
            value = self.__dict__.get(key)
            if value is not None:
                rule_dict[key] = value
        return rule_dict

    def __repr__(self):
        return f"""TargetRule(literal="{self.literal}", category="{self.category}", pattern={self.pattern}, attributes={self.attributes}, on_match={self.on_match})"""

__init__(literal, category, pattern=None, on_match=None, attributes=None, metadata=None)

Creates a new TargetRule.

Parameters:

Name Type Description Default
literal str

The string representation of a concept. If pattern is None, this string will be lower-cased and matched to the lower-case string. If pattern is not None, this argument will not be used for matching but can be used as a reference as the rule name.

required
category str

The semantic class of the matched span. This corresponds to the label_ attribute of an entity.

required
pattern Optional[Union[List[Dict[str, str]], str]]

A list or string to use as a spaCy pattern rather than literal. If a list, will use spaCy token-based pattern matching to match using token attributes. If a string, will use medspaCy's RegexMatcher. If None, will use literal as the pattern for phrase matching. For more information, see https://spacy.io/usage/rule-based-matching.

None
on_match Optional[Callable[[Matcher, Doc, int, List[Tuple[int, int, int]]], Any]]

An optional callback function or other callable which takes 4 arguments: (matcher, doc, i, matches). For more information, see https://spacy.io/usage/rule-based-matching#on_match

None
attributes Optional[Dict[str, Any]]

Optional custom attribute names to set for a Span matched by the direction. These attribute names are stored under Span..[attribute_name]. For example, if attributes={'is_historical':True}, then any spans matched by this direction will have span..is_historical = True

None
metadata Optional[Dict[Any, Any]]

Optional dictionary of any extra metadata.

None
Source code in medspacy/target_matcher/target_rule.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def __init__(
    self,
    literal: str,
    category: str,
    pattern: Optional[Union[List[Dict[str, str]], str]] = None,
    on_match: Optional[
        Callable[[Matcher, Doc, int, List[Tuple[int, int, int]]], Any]
    ] = None,
    attributes: Optional[Dict[str, Any]] = None,
    metadata: Optional[Dict[Any, Any]] = None,
):
    """
    Creates a new TargetRule.

    Args:
        literal: The string representation of a concept. If `pattern` is None, this string will be lower-cased and
            matched to the lower-case string. If `pattern` is not None, this argument will not be used for matching
            but can be used as a reference as the rule name.
        category: The semantic class of the matched span. This corresponds to the `label_` attribute of an entity.
        pattern: A list or string to use as a spaCy pattern rather than `literal`. If a list, will use spaCy
            token-based pattern matching to match using token attributes. If a string, will use medspaCy's
            RegexMatcher. If None, will use `literal` as the pattern for phrase matching. For more information, see
            https://spacy.io/usage/rule-based-matching.
        on_match: An optional callback function or other callable which takes 4 arguments: `(matcher, doc, i,
            matches)`. For more information, see https://spacy.io/usage/rule-based-matching#on_match
        attributes: Optional custom attribute names to set for a Span matched by the direction. These attribute
            names are stored under Span._.[attribute_name]. For example, if `attributes={'is_historical':True}`,
            then any spans matched by this direction will have span._.is_historical = True
        metadata: Optional dictionary of any extra metadata.
    """
    super().__init__(literal, category, pattern, on_match, metadata)
    self.attributes = attributes
    self._rule_id = None

from_dict(rule_dict) classmethod

Reads a dictionary into a ConTextRule. Used when reading from a json file.

Parameters:

Name Type Description Default
rule_dict Dict

the dictionary to convert

required

Returns:

Type Description
TargetRule

The ConTextRule created from the dictionary

Raises:

Type Description
ValueError

if the json is invalid

Source code in medspacy/target_matcher/target_rule.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@classmethod
def from_dict(cls, rule_dict: Dict) -> TargetRule:
    """Reads a dictionary into a ConTextRule. Used when reading from a json file.

    Args:
        rule_dict: the dictionary to convert

    Returns:
        The ConTextRule created from the dictionary

    Raises:
        ValueError: if the json is invalid
    """
    keys = set(rule_dict.keys())
    invalid_keys = keys.difference(cls._ALLOWED_KEYS)
    if invalid_keys:
        msg = (
            "JSON object contains invalid keys: {0}.\n"
            "Must be one of: {1}".format(invalid_keys, cls._ALLOWED_KEYS)
        )
        raise ValueError(msg)
    rule = TargetRule(**rule_dict)
    return rule

from_json(filepath) classmethod

Read in a lexicon of modifiers from a JSON file.

Parameters:

Name Type Description Default
filepath str

the .json file containing modifier rules

required

Returns:

Name Type Description
context_item List[TargetRule]

A list of ConTextRule objects.

Raises:

Type Description
KeyError

If the dictionary contains any keys other than those accepted by ConTextRule.init

Source code in medspacy/target_matcher/target_rule.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
@classmethod
def from_json(cls, filepath: str) -> List[TargetRule]:
    """Read in a lexicon of modifiers from a JSON file.

    Args:
        filepath: the .json file containing modifier rules

    Returns:
        context_item: A list of ConTextRule objects.

    Raises:
        KeyError: If the dictionary contains any keys other than
            those accepted by ConTextRule.__init__
    """
    import json

    with open(filepath) as file:
        target_data = json.load(file)
    target_rules = []
    for data in target_data["target_rules"]:
        target_rules.append(TargetRule.from_dict(data))
    return target_rules

to_dict()

Converts TargetRules to a python dictionary. Used when writing target rules to a json file.

Returns:

Type Description

The dictionary containing the TargetRule info.

Source code in medspacy/target_matcher/target_rule.py
119
120
121
122
123
124
125
126
127
128
129
130
def to_dict(self):
    """Converts TargetRules to a python dictionary. Used when writing target rules to a json file.

    Returns:
        The dictionary containing the TargetRule info.
    """
    rule_dict = {}
    for key in self._ALLOWED_KEYS:
        value = self.__dict__.get(key)
        if value is not None:
            rule_dict[key] = value
    return rule_dict

to_json(target_rules, filepath) classmethod

Writes ConTextItems to a json file.

Parameters:

Name Type Description Default
target_rules List[TargetRule]

a list of TargetRules that will be written to a file.

required
filepath str

the .json file to contain modifier rules

required
Source code in medspacy/target_matcher/target_rule.py
105
106
107
108
109
110
111
112
113
114
115
116
117
@classmethod
def to_json(cls, target_rules: List[TargetRule], filepath: str):
    """Writes ConTextItems to a json file.

    Args:
        target_rules: a list of TargetRules that will be written to a file.
        filepath: the .json file to contain modifier rules
    """
    import json

    data = {"target_rules": [rule.to_dict() for rule in target_rules]}
    with open(filepath, "w") as file:
        json.dump(data, file, indent=4)