hezar.models.backbone.distilbert.distilbert_config module

class hezar.models.backbone.distilbert.distilbert_config.DistilBERTConfig(task: str = <TaskType.LANGUAGE_MODELING: 'language_modeling'>, activation: str = 'gelu', attention_dropout: float = 0.1, dim: int = 768, dropout: float = 0.1, hidden_dim: int = 3072, initializer_range: float = 0.02, max_position_embeddings: int = 512, n_heads: int = 12, n_layers: int = 6, output_past: bool = True, pad_token_id: int = 0, qa_dropout: float = 0.1, tie_weights_: bool = True, vocab_size: int = 42000)[source]

Bases: ModelConfig

activation: str = 'gelu'
attention_dropout: float = 0.1
dim: int = 768
dropout: float = 0.1
hidden_dim: int = 3072
initializer_range: float = 0.02
max_position_embeddings: int = 512
n_heads: int = 12
n_layers: int = 6
name: str = 'distilbert'
output_past: bool = True
pad_token_id: int = 0
qa_dropout: float = 0.1
task: str = 'language_modeling'
tie_weights_: bool = True
vocab_size: int = 42000