Spacy component based on: https://github.com/nipunsadvilkar/pySBD improved to work with spacy 3.0
Source code in medspacy/sentence_splitting.py
13
14
15
16
17
18
19
20
21 | def __call__(self, doc):
"""
Spacy component based on: https://github.com/nipunsadvilkar/pySBD improved to work with spacy 3.0
"""
sents_char_spans = self.seg.segment(doc.text_with_ws)
start_token_ids = [sent.start for sent in sents_char_spans]
for token in doc:
token.is_sent_start = True if token.idx in start_token_ids else False
return doc
|