Skip to content

edspdf.extractors.style

models

BaseStyle

Bases: BaseModel

Model acting as an abstraction for a style.

Source code in edspdf/extractors/style/models.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
class BaseStyle(BaseModel):
    """
    Model acting as an abstraction for a style.
    """

    fontname: Optional[str] = None

    font: str
    style: str
    size: float
    upright: bool

    x0: float
    x1: float
    y0: float
    y1: float

Style

Bases: BaseStyle

Model acting as an abstraction for a style.

Source code in edspdf/extractors/style/models.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
class Style(BaseStyle):
    """
    Model acting as an abstraction for a style.
    """

    @classmethod
    def from_fontname(
        cls,
        fontname: str,
        size: float,
        upright: bool,
        x0: float,
        x1: float,
        y0: float,
        y1: float,
    ) -> "Style":
        """
        Constructor using the compound `fontname` representation.

        Parameters
        ----------
        fontname : str
            Compound description of the font. Often `Arial`,
            `Arial,Bold` or `Arial-Bold`
        size : float
            Character size.
        upright : bool
            Whether the character is upright.

        Returns
        -------
        Style
            Style representation.
        """
        # Round the size to avoid floating point aberrations.
        size = round(size, 2)

        s = SEP_PATTERN.split(fontname)

        font = s.pop(0)

        if s:
            style = s[-1]
        else:
            style = "Normal"

        s = Style(
            fontname=fontname,
            font=font,
            style=style,
            size=size,
            upright=upright,
            x0=x0,
            x1=x1,
            y0=y0,
            y1=y1,
        )

        return s

    @classmethod
    def from_char(
        cls,
        char: LTChar,
        width: float,
        height: float,
    ):
        return cls.from_fontname(
            fontname=char.fontname,
            size=char.size,
            upright=char.upright,
            x0=char.x0 / width,
            x1=char.x1 / width,
            y0=1 - char.y1 / height,
            y1=1 - char.y0 / height,
        )

    def __eq__(self, other: "Style") -> bool:
        """
        Computes equality between two styles.

        Parameters
        ----------
        other : Style
            Style object to compare.

        Returns
        -------
        bool
            Whether the two styles are equal.
        """

        s = (self.font, self.style, round(self.size, 2), self.upright)
        o = (other.font, other.style, round(other.size, 2), other.upright)

        return s == o

    def __add__(self, other: "Style") -> "Style":

        if self != other:
            raise ValueError("You cannot add two different styles")

        st = self.copy()

        st.x0 = min(self.x0, other.x0)
        st.x1 = max(self.x1, other.x1)
        st.y0 = min(self.y0, other.y0)
        st.y1 = max(self.y1, other.y1)

        return st

from_fontname(fontname, size, upright, x0, x1, y0, y1) classmethod

Constructor using the compound fontname representation.

PARAMETER DESCRIPTION
fontname

Compound description of the font. Often Arial, Arial,Bold or Arial-Bold

TYPE: str

size

Character size.

TYPE: float

upright

Whether the character is upright.

TYPE: bool

RETURNS DESCRIPTION
Style

Style representation.

Source code in edspdf/extractors/style/models.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
@classmethod
def from_fontname(
    cls,
    fontname: str,
    size: float,
    upright: bool,
    x0: float,
    x1: float,
    y0: float,
    y1: float,
) -> "Style":
    """
    Constructor using the compound `fontname` representation.

    Parameters
    ----------
    fontname : str
        Compound description of the font. Often `Arial`,
        `Arial,Bold` or `Arial-Bold`
    size : float
        Character size.
    upright : bool
        Whether the character is upright.

    Returns
    -------
    Style
        Style representation.
    """
    # Round the size to avoid floating point aberrations.
    size = round(size, 2)

    s = SEP_PATTERN.split(fontname)

    font = s.pop(0)

    if s:
        style = s[-1]
    else:
        style = "Normal"

    s = Style(
        fontname=fontname,
        font=font,
        style=style,
        size=size,
        upright=upright,
        x0=x0,
        x1=x1,
        y0=y0,
        y1=y1,
    )

    return s

__eq__(other)

Computes equality between two styles.

PARAMETER DESCRIPTION
other

Style object to compare.

TYPE: Style

RETURNS DESCRIPTION
bool

Whether the two styles are equal.

Source code in edspdf/extractors/style/models.py
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
def __eq__(self, other: "Style") -> bool:
    """
    Computes equality between two styles.

    Parameters
    ----------
    other : Style
        Style object to compare.

    Returns
    -------
    bool
        Whether the two styles are equal.
    """

    s = (self.font, self.style, round(self.size, 2), self.upright)
    o = (other.font, other.style, round(other.size, 2), other.upright)

    return s == o

StyledText

Bases: BaseModel

Abstraction of a word, containing the style and the text.

Source code in edspdf/extractors/style/models.py
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class StyledText(BaseModel):
    """
    Abstraction of a word, containing the style and the text.
    """

    text: str
    style: Style

    @classmethod
    def from_char(
        cls,
        char: LTChar,
        width: float,
        height: float,
    ):
        return StyledText(
            text=SPACE_PATTERN.sub(" ", char._text),
            style=Style.from_char(char, width=width, height=height),
        )

    def add_space(self) -> None:
        self.text = f"{self.text.rstrip()} "

    def rstrip(self) -> None:
        self.text = self.text.rstrip()

    def __add__(self, other: "StyledText") -> "StyledText":

        st = StyledText(
            text=self.text + other.text,
            style=self.style + other.style,
        )

        return st

    def __iadd__(self, other: "StyledText") -> "StyledText":
        return self + other