hezar.data.datasets.speech_recognition_dataset module

class hezar.data.datasets.speech_recognition_dataset.SpeechRecognitionDataset(config: SpeechRecognitionDatasetConfig, split=None, preprocessor=None, **kwargs)[source]

Bases: Dataset

required_backends: List[str | Backends] = [Backends.LIBROSA, Backends.DATASETS]
class hezar.data.datasets.speech_recognition_dataset.SpeechRecognitionDatasetConfig(path: 'str' = None, task: 'TaskType | List[TaskType]' = None, max_size: 'int | float' = None, hf_load_kwargs: 'dict' = None, sampling_rate: 'int' = 16000, audio_array_padding: 'bool | str | PaddingType' = 'longest', max_audio_array_length: 'int' = None, labels_padding: 'bool | str | PaddingType' = 'longest', labels_max_length: 'int' = None, audio_file_path_column: 'str' = 'path', audio_column: 'str' = 'audio', audio_array_column: 'str' = 'array', transcript_column: 'str' = 'sentence')[source]

Bases: DatasetConfig

audio_array_column: str = 'array'
audio_array_padding: bool | str | PaddingType = 'longest'
audio_column: str = 'audio'
audio_file_path_column: str = 'path'
labels_max_length: int = None
labels_padding: bool | str | PaddingType = 'longest'
max_audio_array_length: int = None
name: str = 'speech_recognition'
path: str = None
sampling_rate: int = 16000
task: TaskType | List[TaskType] = 'speech_recognition'
transcript_column: str = 'sentence'