hezar.models.speech_recognition.whisper package¶
Submodules¶
- hezar.models.speech_recognition.whisper.whisper_feature_extractor module- WhisperFeatureExtractor
- WhisperFeatureExtractorConfig- WhisperFeatureExtractorConfig.chunk_length
- WhisperFeatureExtractorConfig.feature_size
- WhisperFeatureExtractorConfig.hop_length
- WhisperFeatureExtractorConfig.n_fft
- WhisperFeatureExtractorConfig.name
- WhisperFeatureExtractorConfig.padding
- WhisperFeatureExtractorConfig.padding_side
- WhisperFeatureExtractorConfig.padding_value
- WhisperFeatureExtractorConfig.return_attention_mask
- WhisperFeatureExtractorConfig.sampling_rate
 
 
- hezar.models.speech_recognition.whisper.whisper_speech_recognition module- WhisperSpeechRecognition- WhisperSpeechRecognition.compute_loss()
- WhisperSpeechRecognition.forward()
- WhisperSpeechRecognition.freeze_encoder()
- WhisperSpeechRecognition.generate()
- WhisperSpeechRecognition.get_decoder()
- WhisperSpeechRecognition.get_encoder()
- WhisperSpeechRecognition.get_input_embeddings()
- WhisperSpeechRecognition.get_output_embeddings()
- WhisperSpeechRecognition.is_generative
- WhisperSpeechRecognition.loss_func_name
- WhisperSpeechRecognition.post_process()
- WhisperSpeechRecognition.prepare_inputs_for_generation()
- WhisperSpeechRecognition.preprocess()
- WhisperSpeechRecognition.required_backends
- WhisperSpeechRecognition.resize_token_embeddings()
- WhisperSpeechRecognition.set_output_embeddings()
 
 
- hezar.models.speech_recognition.whisper.whisper_speech_recognition_config module- WhisperSpeechRecognitionConfig- WhisperSpeechRecognitionConfig.activation_dropout
- WhisperSpeechRecognitionConfig.activation_function
- WhisperSpeechRecognitionConfig.apply_spec_augment
- WhisperSpeechRecognitionConfig.attention_dropout
- WhisperSpeechRecognitionConfig.begin_suppress_tokens
- WhisperSpeechRecognitionConfig.bos_token_id
- WhisperSpeechRecognitionConfig.classifier_proj_size
- WhisperSpeechRecognitionConfig.d_model
- WhisperSpeechRecognitionConfig.decoder_attention_heads
- WhisperSpeechRecognitionConfig.decoder_ffn_dim
- WhisperSpeechRecognitionConfig.decoder_layerdrop
- WhisperSpeechRecognitionConfig.decoder_layers
- WhisperSpeechRecognitionConfig.decoder_start_token_id
- WhisperSpeechRecognitionConfig.dropout
- WhisperSpeechRecognitionConfig.encoder_attention_heads
- WhisperSpeechRecognitionConfig.encoder_ffn_dim
- WhisperSpeechRecognitionConfig.encoder_layerdrop
- WhisperSpeechRecognitionConfig.encoder_layers
- WhisperSpeechRecognitionConfig.eos_token_id
- WhisperSpeechRecognitionConfig.generation_config
- WhisperSpeechRecognitionConfig.init_std
- WhisperSpeechRecognitionConfig.is_encoder_decoder
- WhisperSpeechRecognitionConfig.mask_feature_length
- WhisperSpeechRecognitionConfig.mask_feature_min_masks
- WhisperSpeechRecognitionConfig.mask_feature_prob
- WhisperSpeechRecognitionConfig.mask_time_length
- WhisperSpeechRecognitionConfig.mask_time_min_masks
- WhisperSpeechRecognitionConfig.mask_time_prob
- WhisperSpeechRecognitionConfig.max_new_tokens
- WhisperSpeechRecognitionConfig.max_source_positions
- WhisperSpeechRecognitionConfig.max_target_positions
- WhisperSpeechRecognitionConfig.name
- WhisperSpeechRecognitionConfig.num_hidden_layers
- WhisperSpeechRecognitionConfig.num_mel_bins
- WhisperSpeechRecognitionConfig.pad_token_id
- WhisperSpeechRecognitionConfig.sampling_rate
- WhisperSpeechRecognitionConfig.scale_embedding
- WhisperSpeechRecognitionConfig.suppress_tokens
- WhisperSpeechRecognitionConfig.torch_dtype
- WhisperSpeechRecognitionConfig.use_cache
- WhisperSpeechRecognitionConfig.use_weighted_layer_sum
- WhisperSpeechRecognitionConfig.vocab_size
 
- WhisperSpeechRecognitionGenerationConfig- WhisperSpeechRecognitionGenerationConfig.alignment_heads
- WhisperSpeechRecognitionGenerationConfig.begin_suppress_tokens
- WhisperSpeechRecognitionGenerationConfig.bos_token_id
- WhisperSpeechRecognitionGenerationConfig.decoder_start_token_id
- WhisperSpeechRecognitionGenerationConfig.dict()
- WhisperSpeechRecognitionGenerationConfig.eos_token_id
- WhisperSpeechRecognitionGenerationConfig.forced_decoder_ids
- WhisperSpeechRecognitionGenerationConfig.is_multilingual
- WhisperSpeechRecognitionGenerationConfig.max_initial_timestamp_index
- WhisperSpeechRecognitionGenerationConfig.max_length
- WhisperSpeechRecognitionGenerationConfig.max_new_tokens
- WhisperSpeechRecognitionGenerationConfig.no_timestamps_token_id
- WhisperSpeechRecognitionGenerationConfig.pad_token_id
- WhisperSpeechRecognitionGenerationConfig.prev_sot_token_id
- WhisperSpeechRecognitionGenerationConfig.return_timestamps
- WhisperSpeechRecognitionGenerationConfig.suppress_tokens
- WhisperSpeechRecognitionGenerationConfig.task_to_id
 
 
- hezar.models.speech_recognition.whisper.whisper_tokenizer module- WhisperBPEConfig- WhisperBPEConfig.add_bos_token
- WhisperBPEConfig.add_prefix_space
- WhisperBPEConfig.bos_token
- WhisperBPEConfig.eos_token
- WhisperBPEConfig.language
- WhisperBPEConfig.model_max_length
- WhisperBPEConfig.name
- WhisperBPEConfig.notimestamps_token
- WhisperBPEConfig.pad_to_multiple_of
- WhisperBPEConfig.pad_token
- WhisperBPEConfig.padding_side
- WhisperBPEConfig.predict_timestamps
- WhisperBPEConfig.show_progress
- WhisperBPEConfig.stride
- WhisperBPEConfig.task
- WhisperBPEConfig.transcribe_token
- WhisperBPEConfig.translate_token
- WhisperBPEConfig.truncation_side
- WhisperBPEConfig.unk_token
 
- WhisperBPETokenizer