hezar.models.speech_recognition.whisper package¶
Submodules¶
- hezar.models.speech_recognition.whisper.whisper_feature_extractor module
WhisperFeatureExtractorWhisperFeatureExtractorConfigWhisperFeatureExtractorConfig.chunk_lengthWhisperFeatureExtractorConfig.feature_sizeWhisperFeatureExtractorConfig.hop_lengthWhisperFeatureExtractorConfig.n_fftWhisperFeatureExtractorConfig.nameWhisperFeatureExtractorConfig.paddingWhisperFeatureExtractorConfig.padding_sideWhisperFeatureExtractorConfig.padding_valueWhisperFeatureExtractorConfig.return_attention_maskWhisperFeatureExtractorConfig.sampling_rate
- hezar.models.speech_recognition.whisper.whisper_speech_recognition module
WhisperSpeechRecognitionWhisperSpeechRecognition.compute_loss()WhisperSpeechRecognition.forward()WhisperSpeechRecognition.freeze_encoder()WhisperSpeechRecognition.generate()WhisperSpeechRecognition.get_decoder()WhisperSpeechRecognition.get_encoder()WhisperSpeechRecognition.get_input_embeddings()WhisperSpeechRecognition.get_output_embeddings()WhisperSpeechRecognition.is_generativeWhisperSpeechRecognition.loss_func_nameWhisperSpeechRecognition.post_process()WhisperSpeechRecognition.prepare_inputs_for_generation()WhisperSpeechRecognition.preprocess()WhisperSpeechRecognition.required_backendsWhisperSpeechRecognition.resize_token_embeddings()WhisperSpeechRecognition.set_output_embeddings()
- hezar.models.speech_recognition.whisper.whisper_speech_recognition_config module
WhisperSpeechRecognitionConfigWhisperSpeechRecognitionConfig.activation_dropoutWhisperSpeechRecognitionConfig.activation_functionWhisperSpeechRecognitionConfig.apply_spec_augmentWhisperSpeechRecognitionConfig.attention_dropoutWhisperSpeechRecognitionConfig.begin_suppress_tokensWhisperSpeechRecognitionConfig.bos_token_idWhisperSpeechRecognitionConfig.classifier_proj_sizeWhisperSpeechRecognitionConfig.d_modelWhisperSpeechRecognitionConfig.decoder_attention_headsWhisperSpeechRecognitionConfig.decoder_ffn_dimWhisperSpeechRecognitionConfig.decoder_layerdropWhisperSpeechRecognitionConfig.decoder_layersWhisperSpeechRecognitionConfig.decoder_start_token_idWhisperSpeechRecognitionConfig.dropoutWhisperSpeechRecognitionConfig.encoder_attention_headsWhisperSpeechRecognitionConfig.encoder_ffn_dimWhisperSpeechRecognitionConfig.encoder_layerdropWhisperSpeechRecognitionConfig.encoder_layersWhisperSpeechRecognitionConfig.eos_token_idWhisperSpeechRecognitionConfig.generation_configWhisperSpeechRecognitionConfig.init_stdWhisperSpeechRecognitionConfig.is_encoder_decoderWhisperSpeechRecognitionConfig.mask_feature_lengthWhisperSpeechRecognitionConfig.mask_feature_min_masksWhisperSpeechRecognitionConfig.mask_feature_probWhisperSpeechRecognitionConfig.mask_time_lengthWhisperSpeechRecognitionConfig.mask_time_min_masksWhisperSpeechRecognitionConfig.mask_time_probWhisperSpeechRecognitionConfig.max_new_tokensWhisperSpeechRecognitionConfig.max_source_positionsWhisperSpeechRecognitionConfig.max_target_positionsWhisperSpeechRecognitionConfig.nameWhisperSpeechRecognitionConfig.num_hidden_layersWhisperSpeechRecognitionConfig.num_mel_binsWhisperSpeechRecognitionConfig.pad_token_idWhisperSpeechRecognitionConfig.sampling_rateWhisperSpeechRecognitionConfig.scale_embeddingWhisperSpeechRecognitionConfig.suppress_tokensWhisperSpeechRecognitionConfig.torch_dtypeWhisperSpeechRecognitionConfig.use_cacheWhisperSpeechRecognitionConfig.use_weighted_layer_sumWhisperSpeechRecognitionConfig.vocab_size
WhisperSpeechRecognitionGenerationConfigWhisperSpeechRecognitionGenerationConfig.alignment_headsWhisperSpeechRecognitionGenerationConfig.begin_suppress_tokensWhisperSpeechRecognitionGenerationConfig.bos_token_idWhisperSpeechRecognitionGenerationConfig.decoder_start_token_idWhisperSpeechRecognitionGenerationConfig.dict()WhisperSpeechRecognitionGenerationConfig.eos_token_idWhisperSpeechRecognitionGenerationConfig.forced_decoder_idsWhisperSpeechRecognitionGenerationConfig.is_multilingualWhisperSpeechRecognitionGenerationConfig.max_initial_timestamp_indexWhisperSpeechRecognitionGenerationConfig.max_lengthWhisperSpeechRecognitionGenerationConfig.max_new_tokensWhisperSpeechRecognitionGenerationConfig.no_timestamps_token_idWhisperSpeechRecognitionGenerationConfig.pad_token_idWhisperSpeechRecognitionGenerationConfig.prev_sot_token_idWhisperSpeechRecognitionGenerationConfig.return_timestampsWhisperSpeechRecognitionGenerationConfig.suppress_tokensWhisperSpeechRecognitionGenerationConfig.task_to_id
- hezar.models.speech_recognition.whisper.whisper_tokenizer module
WhisperBPEConfigWhisperBPEConfig.add_bos_tokenWhisperBPEConfig.add_prefix_spaceWhisperBPEConfig.bos_tokenWhisperBPEConfig.eos_tokenWhisperBPEConfig.languageWhisperBPEConfig.model_max_lengthWhisperBPEConfig.nameWhisperBPEConfig.notimestamps_tokenWhisperBPEConfig.pad_to_multiple_ofWhisperBPEConfig.pad_tokenWhisperBPEConfig.padding_sideWhisperBPEConfig.predict_timestampsWhisperBPEConfig.show_progressWhisperBPEConfig.strideWhisperBPEConfig.taskWhisperBPEConfig.transcribe_tokenWhisperBPEConfig.translate_tokenWhisperBPEConfig.truncation_sideWhisperBPEConfig.unk_token
WhisperBPETokenizer