hezar.models.text_generation.t5.t5_text_generation_config module

class hezar.models.text_generation.t5.t5_text_generation_config.T5TextGenerationConfig(vocab_size: int = 32103, d_model: int = 768, d_kv: int = 64, d_ff: int = 2048, num_layers: int = 12, num_decoder_layers: int = 12, num_heads: int = 12, relative_attention_num_buckets: int = 32, relative_attention_max_distance: int = 128, dropout_rate: float = 0.1, layer_norm_epsilon: float = 1e-06, initializer_factor: float = 1.0, feed_forward_proj: str = 'gated-gelu', is_encoder_decoder: bool = True, tie_word_embeddings: bool = False, use_cache: bool = True, pad_token_id: int = 0, decoder_start_token_id: int = 0, eos_token_id: int = 1, min_length: int = 0, max_length: int = 100, input_prefix: str = None)[source]

Bases: ModelConfig

d_ff: int = 2048
d_kv: int = 64
d_model: int = 768
decoder_start_token_id: int = 0
dropout_rate: float = 0.1
eos_token_id: int = 1
feed_forward_proj: str = 'gated-gelu'
initializer_factor: float = 1.0
input_prefix: str = None
is_encoder_decoder: bool = True
layer_norm_epsilon: float = 1e-06
max_length: int = 100
min_length: int = 0
name: str = 't5_text_generation'
num_decoder_layers: int = 12
num_heads: int = 12
num_layers: int = 12
pad_token_id: int = 0
relative_attention_max_distance: int = 128
relative_attention_num_buckets: int = 32
tie_word_embeddings: bool = False
use_cache: bool = True
vocab_size: int = 32103