Bases: torch.nn.Module, Generic[T]
Vocabulary layer.
This is not meant to be used as torch.nn.Module but subclassing torch.nn.Module
makes the instances appear when printing a model, which is nice.
Source code in edspdf/layers/vocabulary.py
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92 | @registry.factory.register("vocabulary")
class Vocabulary(torch.nn.Module, Generic[T]):
"""
Vocabulary layer.
This is not meant to be used as torch.nn.Module but subclassing torch.nn.Module
makes the instances appear when printing a model, which is nice.
"""
def __init__(self, items: Sequence[T] = None, default: int = -100):
"""
Parameters
----------
items: Sequence[InputT]
Initial vocabulary elements if any.
Specific elements such as padding and unk can be set here to enforce their
index in the vocabulary.
default: int
Default index to use for out of vocabulary elements
Defaults to -100
"""
super().__init__()
if items is None:
self.indices = {}
self.initialized = False
else:
self.indices = {v: i for i, v in enumerate(items)}
self.initialized = True
self.default = default
def __len__(self):
return len(self.indices)
@contextlib.contextmanager
def initialization(self):
"""
Enters the initialization mode.
Out of vocabulary elements will be assigned an index.
"""
self.initialized = False
yield
self.initialized = True
def encode(self, item):
"""
Converts an element into its vocabulary index
If the layer is in its initialization mode (`with vocab.initialization(): ...`),
and the element is out of vocabulary, a new index will be created and returned.
Otherwise, any oov element will be encoded with the `default` index.
Parameters
----------
item: InputT
Returns
-------
int
"""
if self.initialized:
return self.indices.get(
item, self.default
) # .setdefault(item, len(self.indices))
else:
return self.indices.setdefault(
item, len(self.indices)
) # .setdefault(item, len(self.indices))
def decode(self, idx):
"""
Converts an index into its original value
Parameters
----------
idx: int
Returns
-------
InputT
"""
return list(self.indices.keys())[idx] if idx >= 0 else None
def extra_repr(self):
return "n={}".format(len(self.indices))
|
__init__(items=None, default=-100)
| PARAMETER |
DESCRIPTION |
items |
Initial vocabulary elements if any.
Specific elements such as padding and unk can be set here to enforce their
index in the vocabulary.
TYPE:
Sequence[T]
DEFAULT:
None
|
default |
Default index to use for out of vocabulary elements
Defaults to -100
TYPE:
int
DEFAULT:
-100
|
Source code in edspdf/layers/vocabulary.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 | def __init__(self, items: Sequence[T] = None, default: int = -100):
"""
Parameters
----------
items: Sequence[InputT]
Initial vocabulary elements if any.
Specific elements such as padding and unk can be set here to enforce their
index in the vocabulary.
default: int
Default index to use for out of vocabulary elements
Defaults to -100
"""
super().__init__()
if items is None:
self.indices = {}
self.initialized = False
else:
self.indices = {v: i for i, v in enumerate(items)}
self.initialized = True
self.default = default
|
initialization()
Enters the initialization mode.
Out of vocabulary elements will be assigned an index.
Source code in edspdf/layers/vocabulary.py
43
44
45
46
47
48
49
50
51 | @contextlib.contextmanager
def initialization(self):
"""
Enters the initialization mode.
Out of vocabulary elements will be assigned an index.
"""
self.initialized = False
yield
self.initialized = True
|
encode(item)
Converts an element into its vocabulary index
If the layer is in its initialization mode (with vocab.initialization(): ...),
and the element is out of vocabulary, a new index will be created and returned.
Otherwise, any oov element will be encoded with the default index.
| PARAMETER |
DESCRIPTION |
item |
|
Source code in edspdf/layers/vocabulary.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75 | def encode(self, item):
"""
Converts an element into its vocabulary index
If the layer is in its initialization mode (`with vocab.initialization(): ...`),
and the element is out of vocabulary, a new index will be created and returned.
Otherwise, any oov element will be encoded with the `default` index.
Parameters
----------
item: InputT
Returns
-------
int
"""
if self.initialized:
return self.indices.get(
item, self.default
) # .setdefault(item, len(self.indices))
else:
return self.indices.setdefault(
item, len(self.indices)
) # .setdefault(item, len(self.indices))
|
decode(idx)
Converts an index into its original value
| PARAMETER |
DESCRIPTION |
idx |
|
| RETURNS |
DESCRIPTION |
InputT
|
|
Source code in edspdf/layers/vocabulary.py
77
78
79
80
81
82
83
84
85
86
87
88
89 | def decode(self, idx):
"""
Converts an index into its original value
Parameters
----------
idx: int
Returns
-------
InputT
"""
return list(self.indices.keys())[idx] if idx >= 0 else None
|