# biome.text.features Module

# WordFeatures Class


class WordFeatures (
    embedding_dim: int,
    lowercase_tokens: bool = False,
    trainable: bool = True,
    weights_file: Union[str, NoneType] = None,
    **extra_params,
)

Feature configuration at word level

Parameters

embedding_dim
Dimension of the embeddings
lowercase_tokens
If True, lowercase tokens before the indexing
trainable
If False, freeze the embeddings
weights_file
Path to a file with pretrained weights for the embedding
**extra_params
Extra parameters passed on to the indexer and embedder of the AllenNLP configuration framework. For example: WordFeatures(embedding_dim=300, embedder={"padding_index": 0})

# Instance variables

var config : Dict

Returns the config in AllenNLP format

# to_json Method


def to_json(self) -> Dict

Returns the config as dict for the serialized json config file

# to_dict Method


def to_dict(self) -> Dict

Returns the config as dict

# CharFeatures Class


class CharFeatures (
    embedding_dim: int,
    encoder: Dict[str, Any],
    dropout: float = 0.0,
    lowercase_characters: bool = False,
    **extra_params,
)

Feature configuration at character level

Parameters

embedding_dim
Dimension of the character embeddings.
encoder
A sequence to vector encoder resulting in a word representation based on its characters
dropout
Dropout applied to the output of the encoder
lowercase_characters
If True, lowercase characters before the indexing
**extra_params
Extra parameters passed on to the indexer and embedder of the AllenNLP configuration framework. For example: CharFeatures(embedding_dim=32, indexer={"min_padding_length": 5}, ...)

# Instance variables

var config : Dict

Returns the config in AllenNLP format

# to_json Method


def to_json(self)

Returns the config as dict for the serialized json config file

# to_dict Method


def to_dict(self)

Returns the config as dict

# TransformersFeatures Class


class TransformersFeatures (model_name: str, trainable: bool = False)

Configuration of the feature extracted with the transformers models.

We use AllenNLPs "mismatched" indexer and embedder to get word-level representations. Most of the transformers models work with word-piece tokenizers.

Parameters

model_name
Name of one of the transformers models.
trainable
If false, freeze the transformer weights

# Instance variables

var config : Dict

Returns the config in AllenNLP format

# to_dict Method


def to_dict(self) -> Dict

Returns the config as dict

# to_json Method


def to_json(self) -> Dict

Returns the config as dict for the serialized json config file

Maintained by