Skip to content

edspdf.layers.vocabulary

Vocabulary

Bases: torch.nn.Module, Generic[T]

Vocabulary layer. This is not meant to be used as torch.nn.Module but subclassing torch.nn.Module makes the instances appear when printing a model, which is nice.

Source code in edspdf/layers/vocabulary.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
@registry.factory.register("vocabulary")
class Vocabulary(torch.nn.Module, Generic[T]):
    """
    Vocabulary layer.
    This is not meant to be used as torch.nn.Module but subclassing torch.nn.Module
    makes the instances appear when printing a model, which is nice.
    """

    def __init__(self, items: Sequence[T] = None, default: int = -100):
        """
        Parameters
        ----------
        items: Sequence[InputT]
            Initial vocabulary elements if any.
            Specific elements such as padding and unk can be set here to enforce their
            index in the vocabulary.
        default: int
            Default index to use for out of vocabulary elements
            Defaults to -100
        """
        super().__init__()
        if items is None:
            self.indices = {}
            self.initialized = False
        else:
            self.indices = {v: i for i, v in enumerate(items)}
            self.initialized = True
        self.default = default

    def __len__(self):
        return len(self.indices)

    @contextlib.contextmanager
    def initialization(self):
        """
        Enters the initialization mode.
        Out of vocabulary elements will be assigned an index.
        """
        self.initialized = False
        yield
        self.initialized = True

    def encode(self, item):
        """
        Converts an element into its vocabulary index
        If the layer is in its initialization mode (`with vocab.initialization(): ...`),
        and the element is out of vocabulary, a new index will be created and returned.
        Otherwise, any oov element will be encoded with the `default` index.

        Parameters
        ----------
        item: InputT

        Returns
        -------
        int
        """
        if self.initialized:
            return self.indices.get(
                item, self.default
            )  # .setdefault(item, len(self.indices))
        else:
            return self.indices.setdefault(
                item, len(self.indices)
            )  # .setdefault(item, len(self.indices))

    def decode(self, idx):
        """
        Converts an index into its original value

        Parameters
        ----------
        idx: int

        Returns
        -------
        InputT
        """
        return list(self.indices.keys())[idx] if idx >= 0 else None

    def extra_repr(self):
        return "n={}".format(len(self.indices))

__init__(items=None, default=-100)

PARAMETER DESCRIPTION
items

Initial vocabulary elements if any. Specific elements such as padding and unk can be set here to enforce their index in the vocabulary.

TYPE: Sequence[T] DEFAULT: None

default

Default index to use for out of vocabulary elements Defaults to -100

TYPE: int DEFAULT: -100

Source code in edspdf/layers/vocabulary.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
def __init__(self, items: Sequence[T] = None, default: int = -100):
    """
    Parameters
    ----------
    items: Sequence[InputT]
        Initial vocabulary elements if any.
        Specific elements such as padding and unk can be set here to enforce their
        index in the vocabulary.
    default: int
        Default index to use for out of vocabulary elements
        Defaults to -100
    """
    super().__init__()
    if items is None:
        self.indices = {}
        self.initialized = False
    else:
        self.indices = {v: i for i, v in enumerate(items)}
        self.initialized = True
    self.default = default

initialization()

Enters the initialization mode. Out of vocabulary elements will be assigned an index.

Source code in edspdf/layers/vocabulary.py
43
44
45
46
47
48
49
50
51
@contextlib.contextmanager
def initialization(self):
    """
    Enters the initialization mode.
    Out of vocabulary elements will be assigned an index.
    """
    self.initialized = False
    yield
    self.initialized = True

encode(item)

Converts an element into its vocabulary index If the layer is in its initialization mode (with vocab.initialization(): ...), and the element is out of vocabulary, a new index will be created and returned. Otherwise, any oov element will be encoded with the default index.

PARAMETER DESCRIPTION
item

RETURNS DESCRIPTION
int
Source code in edspdf/layers/vocabulary.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
def encode(self, item):
    """
    Converts an element into its vocabulary index
    If the layer is in its initialization mode (`with vocab.initialization(): ...`),
    and the element is out of vocabulary, a new index will be created and returned.
    Otherwise, any oov element will be encoded with the `default` index.

    Parameters
    ----------
    item: InputT

    Returns
    -------
    int
    """
    if self.initialized:
        return self.indices.get(
            item, self.default
        )  # .setdefault(item, len(self.indices))
    else:
        return self.indices.setdefault(
            item, len(self.indices)
        )  # .setdefault(item, len(self.indices))

decode(idx)

Converts an index into its original value

PARAMETER DESCRIPTION
idx

RETURNS DESCRIPTION
InputT
Source code in edspdf/layers/vocabulary.py
77
78
79
80
81
82
83
84
85
86
87
88
89
def decode(self, idx):
    """
    Converts an index into its original value

    Parameters
    ----------
    idx: int

    Returns
    -------
    InputT
    """
    return list(self.indices.keys())[idx] if idx >= 0 else None