Skip to content

medspacy.visualization

MedspaCyVisualizerWidget

Source code in medspacy/visualization.py
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
class MedspaCyVisualizerWidget:
    def __init__(self, docs, target_span_type: str = "ents", span_group_name: str = "medspacy_spans"):

        """Create an IPython Widget Box displaying medspaCy's visualizers.
        The widget allows selecting visualization style ("Ent", "Dep", or "Both")
        and a slider for selecting the index of docs.

        For more information on IPython widgets, see:
            https://ipywidgets.readthedocs.io/en/latest/index.html

        Parameters:
            docs: A list of docs processed by a medspaCy pipeline

        """

        import ipywidgets as widgets

        self.docs = docs
        self.target_span_type = target_span_type 
        self.span_group_name = span_group_name
        self.slider = widgets.IntSlider(
            value=0,
            min=0,
            max=len(docs) - 1,
            step=1,
            description="Doc:",
            disabled=False,
            continuous_update=False,
            orientation="horizontal",
            readout=True,
            readout_format="d",
        )
        self.radio = widgets.RadioButtons(options=["Ent", "Dep", "Both"])
        self.layout = widgets.Layout(
            display="flex", flex_flow="column", align_items="stretch", width="100%"
        )
        self.radio.observe(self._change_handler)
        self.slider.observe(self._change_handler)
        self.next_button = widgets.Button(description="Next")
        self.next_button.on_click(self._on_click_next)
        self.previous_button = widgets.Button(description="Previous")
        self.previous_button.on_click(self._on_click_prev)
        self.output = widgets.Output()
        self.box = widgets.Box(
            [
                widgets.HBox([self.radio, self.previous_button, self.next_button]),
                self.slider,
                self.output,
            ],
            layout=self.layout,
        )

        self.display()
        with self.output:
            self._visualize_doc()

    def display(self):
        """Display the Box widget in the current IPython cell."""
        from IPython.display import display as ipydisplay

        ipydisplay(self.box)

    def _change_handler(self, change):

        with self.output:
            self._visualize_doc()

    def _visualize_doc(self):
        self.output.clear_output()
        doc = self.docs[self.slider.value]
        if self.radio.value.lower() in ("dep", "both"):
            visualize_dep(doc)
        if self.radio.value.lower() in ("ent", "both"):
            visualize_ent(doc, target_span_type=self.target_span_type, span_group_name=self.span_group_name)

    def _on_click_next(self, b):
        if self.slider.value < len(self.docs) - 1:
            self.slider.value += 1

    def _on_click_prev(self, b):
        if self.slider.value > 0:
            self.slider.value -= 1

    def set_docs(self, docs):
        "Replace the list of docs to be visualized."
        self.docs = docs
        self._visualize_doc(self.docs[0])

__init__(docs, target_span_type='ents', span_group_name='medspacy_spans')

Create an IPython Widget Box displaying medspaCy's visualizers. The widget allows selecting visualization style ("Ent", "Dep", or "Both") and a slider for selecting the index of docs.

For more information on IPython widgets, see: https://ipywidgets.readthedocs.io/en/latest/index.html

Parameters:

Name Type Description Default
docs

A list of docs processed by a medspaCy pipeline

required
Source code in medspacy/visualization.py
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
def __init__(self, docs, target_span_type: str = "ents", span_group_name: str = "medspacy_spans"):

    """Create an IPython Widget Box displaying medspaCy's visualizers.
    The widget allows selecting visualization style ("Ent", "Dep", or "Both")
    and a slider for selecting the index of docs.

    For more information on IPython widgets, see:
        https://ipywidgets.readthedocs.io/en/latest/index.html

    Parameters:
        docs: A list of docs processed by a medspaCy pipeline

    """

    import ipywidgets as widgets

    self.docs = docs
    self.target_span_type = target_span_type 
    self.span_group_name = span_group_name
    self.slider = widgets.IntSlider(
        value=0,
        min=0,
        max=len(docs) - 1,
        step=1,
        description="Doc:",
        disabled=False,
        continuous_update=False,
        orientation="horizontal",
        readout=True,
        readout_format="d",
    )
    self.radio = widgets.RadioButtons(options=["Ent", "Dep", "Both"])
    self.layout = widgets.Layout(
        display="flex", flex_flow="column", align_items="stretch", width="100%"
    )
    self.radio.observe(self._change_handler)
    self.slider.observe(self._change_handler)
    self.next_button = widgets.Button(description="Next")
    self.next_button.on_click(self._on_click_next)
    self.previous_button = widgets.Button(description="Previous")
    self.previous_button.on_click(self._on_click_prev)
    self.output = widgets.Output()
    self.box = widgets.Box(
        [
            widgets.HBox([self.radio, self.previous_button, self.next_button]),
            self.slider,
            self.output,
        ],
        layout=self.layout,
    )

    self.display()
    with self.output:
        self._visualize_doc()

display()

Display the Box widget in the current IPython cell.

Source code in medspacy/visualization.py
308
309
310
311
312
def display(self):
    """Display the Box widget in the current IPython cell."""
    from IPython.display import display as ipydisplay

    ipydisplay(self.box)

set_docs(docs)

Replace the list of docs to be visualized.

Source code in medspacy/visualization.py
335
336
337
338
def set_docs(self, docs):
    "Replace the list of docs to be visualized."
    self.docs = docs
    self._visualize_doc(self.docs[0])

_create_color_generator()

Create a generator which will cycle through a list of default matplotlib colors

Source code in medspacy/visualization.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def _create_color_generator():
    """Create a generator which will cycle through a list of
    default matplotlib colors"""
    from itertools import cycle

    colors = [
        "#1f77b4",
        "#ff7f0e",
        "#2ca02c",
        "#d62728",
        "#9467bd",
        "#8c564b",
        "#e377c2",
        "#7f7f7f",
        "#bcbd22",
        "#17becf",
    ]
    return cycle(colors)

visualize_dep(doc, jupyter=True)

Create a dependency-style visualization for ConText targets and modifiers in doc. This will show the relationships between entities in doc and contextual modifiers.

Parameters:

Name Type Description Default
doc Doc

The spacy Doc to visualize.

required
jupyter bool

Whether it is being rendered in a jupyter notebook.

True

Returns:

Type Description
str

The visualization.

Source code in medspacy/visualization.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
def visualize_dep(doc: Doc, jupyter: bool = True) -> str:
    """
    Create a dependency-style visualization for ConText targets and modifiers in doc. This will show the relationships
    between entities in doc and contextual modifiers.

    Args:
        doc: The spacy Doc to visualize.
        jupyter: Whether it is being rendered in a jupyter notebook.

    Returns:
        The visualization.
    """
    token_data = []
    token_data_mapping = {}
    for token in doc:
        data = {"text": token.text, "tag": "", "index": token.i}
        token_data.append(data)
        token_data_mapping[token] = data

    # Merge phrases
    # targets_and_modifiers = [*doc._.context_graph.targets]
    existing_tokens = set()
    targets_and_modifiers = []
    # Used to prevent duplication of token in targets or modifiers that appear twice due to being in a span group or, appearing twice as a modifier
    for target_or_modifier in (list(doc._.context_graph.targets) + doc._.context_graph.modifiers):
        if isinstance (target_or_modifier, Span):
            span=target_or_modifier
        else:
            span=doc[target_or_modifier._start : target_or_modifier._end]
        already_seen = False 
        for token in span:
            if token in existing_tokens:
                already_seen = True 
                break 
        if not already_seen:
            targets_and_modifiers.append(target_or_modifier)
            existing_tokens.update({token for token in span}) 

    for obj in targets_and_modifiers:
        if isinstance(obj, Span):
            first_token = obj[0]
            data = token_data_mapping[first_token]
            data["tag"] = obj.label_
            if len(obj) > 1:
                idx = data["index"]
                for other_token in obj[1:]:
                    # Add the text to the display data for the first word
                    # and remove the subsequent token
                    data["text"] += " " + other_token.text
                    # Remove this token from the list of display data
                    token_data.pop(idx + 1)
                for other_data in token_data[idx + 1:]:
                    other_data["index"] -= len(obj) - 1
        else:
            span_tup = obj.modifier_span
            first_token = doc[span_tup[0]]
            data = token_data_mapping[first_token]
            data["tag"] = obj.category
            if span_tup[1] - span_tup[0] > 1:
                span = doc[span_tup[0]: span_tup[1]]
                idx = data["index"]
                for other_token in span[1:]:
                    # Add the text to the display data for the first word
                    # and remove the subsequent token
                    data["text"] += " " + other_token.text
                    # Remove this token from the list of display data
                    token_data.pop(idx + 1)
                for other_data in token_data[idx + 1:]:
                    other_data["index"] -= len(span) - 1

        # if len(span) == 1:
        #     continue
        #
        # idx = data["index"]
        # for other_token in span[1:]:
        #     # Add the text to the display data for the first word
        #     # and remove the subsequent token
        #     data["text"] += " " + other_token.text
        #     # Remove this token from the list of display data
        #     token_data.pop(idx + 1)
        #
        # # Lower the index of the following tokens
        # for other_data in token_data[idx + 1 :]:
        #     other_data["index"] -= len(span) - 1

    dep_data = {"words": token_data, "arcs": []}

    # Gather the edges between targets and modifiers
    for target, modifier in doc._.context_graph.edges:
        target_data = token_data_mapping[target[0]]
        modifier_data = token_data_mapping[doc[modifier.modifier_span[0]]]
        dep_data["arcs"].append(
            {
                "start": min(target_data["index"], modifier_data["index"]),
                "end": max(target_data["index"], modifier_data["index"]),
                "label": modifier.category,
                "dir": "right"
                if target > doc[modifier.modifier_span[0] : modifier.modifier_span[1]]
                else "left",
            }
        )

    return displacy.render(dep_data, manual=True, jupyter=jupyter)

visualize_ent(doc, context=True, sections=True, jupyter=True, colors=None, target_span_type='ents', span_group_name='medspacy_spans')

Creates a NER-style visualization for targets and modifiers in Doc.

Parameters:

Name Type Description Default
doc Doc

A spacy doc to visualize.

required
context bool

Whether to display the modifiers generated by medSpaCy's cycontext. If the doc has not been processed by context, this will be automatically changed to False. Default True.

True
sections bool

Whether to display the section titles generated by medSpaCy's sectionizer (still in development). If the doc has not been processed by sectionizer , this will be automatically changed to False. This may also have some overlap with cycontext, in which case duplicate spans will be displayed. Default True.

True
jupyter bool

If True, will render directly in a Jupyter notebook. If False, will return the HTML. Default True.

True
colors Dict[str, str]

An optional dictionary which maps labels of targets and modifiers to color strings to be rendered. If None, will create a generator which cycles through the default matplotlib colors for ent and modifier labels and uses a light gray for section headers. Default None.

None

Returns:

Type Description
str

The visualization.

Source code in medspacy/visualization.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def visualize_ent(
    doc: Doc,
    context: bool = True,
    sections: bool = True,
    jupyter: bool = True,
    colors: Dict[str, str] = None,
    target_span_type: str = "ents",
    span_group_name: str = "medspacy_spans"
) -> str:
    """
    Creates a NER-style visualization for targets and modifiers in Doc.

    Args:
        doc: A spacy doc to visualize.
        context: Whether to display the modifiers generated by medSpaCy's cycontext. If the doc has not been processed
            by context, this will be automatically changed to False. Default True.
        sections: Whether to display the section titles generated by medSpaCy's sectionizer (still in development). If
            the doc has not been processed by sectionizer , this will be automatically changed to False. This may also
            have some overlap with cycontext, in which case duplicate spans will be displayed. Default True.
        jupyter: If True, will render directly in a Jupyter notebook. If False, will return the HTML. Default True.
        colors: An optional dictionary which maps labels of targets and modifiers to color strings to be rendered. If
            None, will create a generator which cycles through the default matplotlib colors for ent and modifier labels
            and uses a light gray for section headers. Default None.

    Returns:
        The visualization.
    """
    # Make sure that doc has the custom medSpaCy attributes registered
    if not hasattr(doc._, "context_graph"):
        context = False
    if not hasattr(doc._, "sections"):
        sections = False

    ents_data = []

    if target_span_type == "ents":
        targets = doc.ents
    elif target_span_type == "group":
        targets = doc.spans[span_group_name]
    else:
        raise ValueError("Target span type must be either ents or group.")

    for target in targets:
        ent_data = {
            "start": target.start_char,
            "end": target.end_char,
            "label": target.label_.upper(),
        }
        ents_data.append((ent_data, "ent"))

    if context:
        visualized_modifiers = set()
        for target in doc.ents:
            for modifier in target._.modifiers:
                if modifier in visualized_modifiers:
                    continue
                span = doc[modifier.modifier_span[0]: modifier.modifier_span[1]]
                ent_data = {
                    "start": span.start_char,
                    "end": span.end_char,
                    "label": modifier.category,
                }
                ents_data.append((ent_data, "modifier"))
                visualized_modifiers.add(modifier)
    if sections:
        for section in doc._.sections:
            category = section.category
            if category is None:
                continue
            span = doc[section.title_span[0]: section.title_span[1]]
            ent_data = {
                "start": span.start_char,
                "end": span.end_char,
                "label": f"<< {category.upper()} >>",
            }
            ents_data.append((ent_data, "section"))
    if len(ents_data) == 0:  # No data to display
        viz_data = [{"text": doc.text, "ents": []}]
        options = dict()
    else:
        ents_data = sorted(ents_data, key=lambda x: x[0]["start"])

        # If colors aren't defined, generate color mappings for each entity
        # and modifier label and set all section titles to a light gray
        if colors is None:
            labels = set()
            section_titles = set()
            for (ent_data, ent_type) in ents_data:
                if ent_type in ("ent", "modifier"):
                    labels.add(ent_data["label"])
                elif ent_type == "section":
                    section_titles.add(ent_data["label"])
            colors = _create_color_mapping(labels)
            for title in section_titles:
                colors[title] = "#dee0e3"
        ents_display_data, _ = zip(*ents_data)
        viz_data = [
            {
                "text": doc.text,
                "ents": ents_display_data,
            }
        ]

        options = {
            "colors": colors,
        }
    return displacy.render(
        viz_data, style="ent", manual=True, options=options, jupyter=jupyter
    )