@@ -70,38 +70,54 @@ class AutomaticSpeechRecognitionPipeline(ChunkPipeline):
70
70
71
71
The input can be either a raw waveform or a audio file. In case of the audio file, ffmpeg should be installed for
72
72
to support multiple audio formats
73
+
74
+ Arguments:
75
+ model ([`PreTrainedModel`] or [`TFPreTrainedModel`]):
76
+ The model that will be used by the pipeline to make predictions. This needs to be a model inheriting from
77
+ [`PreTrainedModel`] for PyTorch and [`TFPreTrainedModel`] for TensorFlow.
78
+ tokenizer ([`PreTrainedTokenizer`]):
79
+ The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
80
+ [`PreTrainedTokenizer`].
81
+ feature_extractor ([`SequenceFeatureExtractor`]):
82
+ The feature extractor that will be used by the pipeline to encode waveform for the model.
83
+ chunk_length_s (`float`, *optional*, defaults to 0):
84
+ The input length for in each chunk. If `chunk_length_s = 0` then chunking is disabled (default). Only
85
+ available for CTC models, e.g. [`Wav2Vec2ForCTC`].
86
+
87
+ <Tip>
88
+
89
+ For more information on how to effectively use `chunk_length_s`, please have a look at the [ASR chunking
90
+ blog post](https://huggingface.co/blog/asr-chunking).
91
+
92
+ </Tip>
93
+
94
+ stride_length_s (`float`, *optional*, defaults to `chunk_length_s / 6`):
95
+ The length of stride on the left and right of each chunk. Used only with `chunk_length_s > 0`. This enables
96
+ the model to *see* more context and infer letters better than without this context but the pipeline
97
+ discards the stride bits at the end to make the final reconstitution as perfect as possible.
98
+
99
+ <Tip>
100
+
101
+ For more information on how to effectively use `stride_length_s`, please have a look at the [ASR chunking
102
+ blog post](https://huggingface.co/blog/asr-chunking).
103
+
104
+ </Tip>
105
+
106
+ framework (`str`, *optional*):
107
+ The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must be
108
+ installed. If no framework is specified, will default to the one currently installed. If no framework is
109
+ specified and both frameworks are installed, will default to the framework of the `model`, or to PyTorch if
110
+ no model is provided.
111
+ device (`int`, *optional*, defaults to -1):
112
+ Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, a positive will run the model on
113
+ the associated CUDA device id.
114
+ decoder (`pyctcdecode.BeamSearchDecoderCTC`, *optional*):
115
+ [PyCTCDecode's
116
+ BeamSearchDecoderCTC](https://github.com/kensho-technologies/pyctcdecode/blob/2fd33dc37c4111417e08d89ccd23d28e9b308d19/pyctcdecode/decoder.py#L180)
117
+ can be passed for language model boosted decoding. See [`Wav2Vec2ProcessorWithLM`] for more information.
73
118
"""
74
119
75
120
def __init__ (self , feature_extractor : Union ["SequenceFeatureExtractor" , str ], * args , ** kwargs ):
76
- """
77
- Arguments:
78
- model ([`PreTrainedModel`] or [`TFPreTrainedModel`]):
79
- The model that will be used by the pipeline to make predictions. This needs to be a model inheriting
80
- from [`PreTrainedModel`] for PyTorch and [`TFPreTrainedModel`] for TensorFlow.
81
- tokenizer ([`PreTrainedTokenizer`]):
82
- The tokenizer that will be used by the pipeline to encode data for the model. This object inherits from
83
- [`PreTrainedTokenizer`].
84
- feature_extractor ([`SequenceFeatureExtractor`]):
85
- The feature extractor that will be used by the pipeline to encode waveform for the model.
86
- chunk_length_s (`float`, *optional*, defaults to 0):
87
- The input length for in each chunk. If `0` then chunking is disabled (default). Only available for CTC
88
- models.
89
- stride_length_s (`float`, *optional*, defaults to `chunk_length_s / 6`):
90
- The length of stride on the left and right of each chunk. Used only with `chunk_length_s > 0`. This
91
- enables the model to *see* more context and infer letters better than without this context but the
92
- pipeline discards the stride bits at the end to make the final reconstitution as perfect as possible.
93
- framework (`str`, *optional*):
94
- The framework to use, either `"pt"` for PyTorch or `"tf"` for TensorFlow. The specified framework must
95
- be installed.
96
-
97
- If no framework is specified, will default to the one currently installed. If no framework is specified
98
- and both frameworks are installed, will default to the framework of the `model`, or to PyTorch if no
99
- model is provided.
100
- device (`int`, *optional*, defaults to -1):
101
- Device ordinal for CPU/GPU supports. Setting this to -1 will leverage CPU, a positive will run the
102
- model on the associated CUDA device id.
103
- """
104
-
105
121
super ().__init__ (* args , ** kwargs )
106
122
self .feature_extractor = feature_extractor
107
123
0 commit comments