hezar.models.speech_recognition.whisper.whisper_speech_recognition_config module¶
- class hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.WhisperSpeechRecognitionConfig(vocab_size: int = 51865, num_mel_bins: int = 80, encoder_layers: int = 6, encoder_attention_heads: int = 4, decoder_layers: int = 6, decoder_attention_heads: int = 4, num_hidden_layers: int = 12, decoder_ffn_dim: int = 1536, encoder_ffn_dim: int = 1536, encoder_layerdrop: float = 0.0, decoder_layerdrop: float = 0.0, decoder_start_token_id: int = 50257, use_cache: bool = False, sampling_rate: int = 16000, is_encoder_decoder: bool = True, activation_function: str = 'gelu', d_model: int = 256, dropout: float = 0.0, torch_dtype: str = 'float32', attention_dropout: float = 0.0, activation_dropout: float = 0.0, init_std: float = 0.02, scale_embedding: bool = False, max_source_positions: int = 1500, max_target_positions: int = 448, pad_token_id: int = 50256, bos_token_id: int = 50257, eos_token_id: int = 50256, suppress_tokens: List[int] = None, begin_suppress_tokens: List[int] = <factory>, use_weighted_layer_sum: bool = False, classifier_proj_size: int = 256, apply_spec_augment: bool = False, mask_time_prob: float = 0.05, mask_time_length: int = 10, mask_time_min_masks: int = 2, mask_feature_prob: float = 0.0, mask_feature_length: int = 10, mask_feature_min_masks: int = 0, max_new_tokens: int = 448, generation_config: dict | hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.WhisperSpeechRecognitionGenerationConfig = None)[source]¶
Bases:
ModelConfig
- activation_dropout: float = 0.0¶
- activation_function: str = 'gelu'¶
- apply_spec_augment: bool = False¶
- attention_dropout: float = 0.0¶
- begin_suppress_tokens: List[int]¶
- bos_token_id: int = 50257¶
- classifier_proj_size: int = 256¶
- d_model: int = 256¶
- decoder_attention_heads: int = 4¶
- decoder_ffn_dim: int = 1536¶
- decoder_layerdrop: float = 0.0¶
- decoder_layers: int = 6¶
- decoder_start_token_id: int = 50257¶
- dropout: float = 0.0¶
- encoder_attention_heads: int = 4¶
- encoder_ffn_dim: int = 1536¶
- encoder_layerdrop: float = 0.0¶
- encoder_layers: int = 6¶
- eos_token_id: int = 50256¶
- generation_config: dict | WhisperSpeechRecognitionGenerationConfig = None¶
- init_std: float = 0.02¶
- is_encoder_decoder: bool = True¶
- mask_feature_length: int = 10¶
- mask_feature_min_masks: int = 0¶
- mask_feature_prob: float = 0.0¶
- mask_time_length: int = 10¶
- mask_time_min_masks: int = 2¶
- mask_time_prob: float = 0.05¶
- max_new_tokens: int = 448¶
- max_source_positions: int = 1500¶
- max_target_positions: int = 448¶
- name: str = 'whisper_speech_recognition'¶
- num_mel_bins: int = 80¶
- pad_token_id: int = 50256¶
- sampling_rate: int = 16000¶
- scale_embedding: bool = False¶
- suppress_tokens: List[int] = None¶
- torch_dtype: str = 'float32'¶
- use_cache: bool = False¶
- use_weighted_layer_sum: bool = False¶
- vocab_size: int = 51865¶
- class hezar.models.speech_recognition.whisper.whisper_speech_recognition_config.WhisperSpeechRecognitionGenerationConfig(alignment_heads: List[List[int]] = None, begin_suppress_tokens: List[int] = <factory>, bos_token_id: int = 50257, decoder_start_token_id: int = 50258, eos_token_id: int = 50257, forced_decoder_ids: List[List[int]] = <factory>, is_multilingual: bool = True, max_initial_timestamp_index: int = 50, max_length: int = 448, max_new_tokens: int = 448, no_timestamps_token_id: int = 50363, pad_token_id: int = 50257, prev_sot_token_id: int = 50361, return_timestamps: int = False, suppress_tokens: List[int] = None, task_to_id: dict[str, int] = <factory>)[source]¶
Bases:
object
- alignment_heads: List[List[int]] = None¶
- begin_suppress_tokens: List[int]¶
- bos_token_id: int = 50257¶
- decoder_start_token_id: int = 50258¶
- eos_token_id: int = 50257¶
- forced_decoder_ids: List[List[int]]¶
- is_multilingual: bool = True¶
- max_initial_timestamp_index: int = 50¶
- max_length: int = 448¶
- max_new_tokens: int = 448¶
- no_timestamps_token_id: int = 50363¶
- pad_token_id: int = 50257¶
- prev_sot_token_id: int = 50361¶
- return_timestamps: int = False¶
- suppress_tokens: List[int] = None¶
- task_to_id: dict[str, int]¶