Skip to content

medspacy.sentence_splitting

PySBDSentenceSplitter

Source code in medspacy/sentence_splitting.py
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
@Language.factory("medspacy_pysbd")
class PySBDSentenceSplitter:
    def __init__(self, name, nlp, clean=False):
        self.name = name
        self.nlp = nlp
        self.seg = pysbd.Segmenter(language="en", clean=clean, char_span=True)

    def __call__(self, doc):
        """
        Spacy component based on: https://github.com/nipunsadvilkar/pySBD improved to work with spacy 3.0
        """
        sents_char_spans = self.seg.segment(doc.text_with_ws)
        start_token_ids = [sent.start for sent in sents_char_spans]
        for token in doc:
            token.is_sent_start = True if token.idx in start_token_ids else False
        return doc

__call__(doc)

Spacy component based on: https://github.com/nipunsadvilkar/pySBD improved to work with spacy 3.0

Source code in medspacy/sentence_splitting.py
13
14
15
16
17
18
19
20
21
def __call__(self, doc):
    """
    Spacy component based on: https://github.com/nipunsadvilkar/pySBD improved to work with spacy 3.0
    """
    sents_char_spans = self.seg.segment(doc.text_with_ws)
    start_token_ids = [sent.start for sent in sents_char_spans]
    for token in doc:
        token.is_sent_start = True if token.idx in start_token_ids else False
    return doc