Skip to content

edspdf.components.classifiers.random

RandomClassifier

Bases: Component

Random classifier, for chaos purposes. Classifies each line to a random element.

Source code in edspdf/components/classifiers/random.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@registry.factory.register("random-classifier")
class RandomClassifier(Component):
    """
    Random classifier, for chaos purposes. Classifies each line to a random element.
    """

    def __init__(
        self,
        labels: Union[List[str], Dict[str, float]],
        seed: Optional[int] = 0,
    ) -> None:
        super().__init__()

        if isinstance(labels, list):
            labels = {c: 1 for c in labels}

        self.labels = {c: w / sum(labels.values()) for c, w in labels.items()}

        self.rgn = np.random.default_rng(seed=seed)

    def __call__(self, doc: PDFDoc) -> PDFDoc:
        prediction = self.rgn.choice(
            list(self.labels.keys()),
            p=list(self.labels.values()),
            size=len(doc.lines),
        )
        for b, label in zip(doc.lines, prediction):
            b.label = label

        return doc