Skip to content

edspdf.components.classifiers.mask

MaskClassifier

Bases: Component

Mask classifier, that reproduces the PdfBox behaviour.

Source code in edspdf/components/classifiers/mask.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
class MaskClassifier(Component):
    """
    Mask classifier, that reproduces the PdfBox behaviour.
    """

    def __init__(
        self,
        *ms: Box,
        threshold: float = 1.0,
    ):
        super().__init__()

        masks = list(ms)

        masks.append(
            Box(
                label="pollution",
                x0=-10000,
                x1=10000,
                y0=-10000,
                y1=10000,
            )
        )

        self.masks = masks
        self.threshold = threshold

    def __call__(self, doc: PDFDoc) -> PDFDoc:

        doc.lines = align_box_labels(
            src_boxes=self.masks,
            dst_boxes=doc.lines,
            threshold=self.threshold,
        )

        return doc