diff --git a/README.md b/README.md index 11409f1..6bd1ca7 100644 --- a/README.md +++ b/README.md @@ -59,3 +59,7 @@ The following line will put it on path for the current session. If you use a vir ``` export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`python3 -c 'import os; import nvidia.cublas.lib; import nvidia.cudnn.lib; print(os.path.dirname(nvidia.cublas.lib.__file__) + ":" + os.path.dirname(nvidia.cudnn.lib.__file__))'` ``` + +# Usage Guidance + +See [examples](./examples) for how to use the models. \ No newline at end of file diff --git a/examples/audio.opus b/examples/audio.opus new file mode 100644 index 0000000..767de24 Binary files /dev/null and b/examples/audio.opus differ diff --git a/examples/with_faster_whisper.py b/examples/with_faster_whisper.py new file mode 100644 index 0000000..dba9528 --- /dev/null +++ b/examples/with_faster_whisper.py @@ -0,0 +1,13 @@ +""" +Example using faster-whisper with ctranslate2 backend for fast audio transcription. +Run with: + pip install faster-whisper + python with_faster_whisper.py +""" + +import faster_whisper +model = faster_whisper.WhisperModel('ivrit-ai/whisper-large-v3-turbo-ct2') + +segs, _ = model.transcribe('audio.opus', language='he') +text = ' '.join(s.text for s in segs) +print(f'Transcribed text: {text}') \ No newline at end of file diff --git a/examples/with_stable_timestamps.py b/examples/with_stable_timestamps.py new file mode 100644 index 0000000..15e74d9 --- /dev/null +++ b/examples/with_stable_timestamps.py @@ -0,0 +1,13 @@ +""" +Example of Stable-ts with faster-whisper for fast and accurate transcription. +Run with: + pip install -U 'stable-ts[fw]' + python with_stable_timestamps.py +""" + +import stable_whisper + +model = stable_whisper.load_faster_whisper('ivrit-ai/whisper-large-v3-turbo-ct2') +segs = model.transcribe('audio.opus', language='he') # Word level timestamps enabled by default +for s in segs: + print(f'{s.start:.2f} - {s.end:.2f}: {s.text}') \ No newline at end of file diff --git a/examples/with_whisper_cpp.py b/examples/with_whisper_cpp.py new file mode 100644 index 0000000..beaab0a --- /dev/null +++ b/examples/with_whisper_cpp.py @@ -0,0 +1,21 @@ +""" +Example of using whispercpp for fast and lightweight transcription. +Download the model from Hugging Face: + wget https://huggingface.co/ivrit-ai/whisper-large-v3-turbo-ggml/resolve/main/ggml-model.bin +Run with: + pip install pywhispercpp huggingface-hub + python with_whisper_cpp.py +""" + +from pywhispercpp.model import Model +from huggingface_hub import hf_hub_download + + +model_path = hf_hub_download( + repo_id="ivrit-ai/whisper-large-v3-turbo-ggml", + filename="ggml-model.bin" +) +model = Model(model_path) +segs = model.transcribe('audio.opus', language='he') +text = ' '.join(segment.text for segment in segs) +print(f'Transcribed text: {text}') \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 85c3b07..4fa57da 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,11 @@ +[project] +name = "asr-training" +version = "0.1.0" +description = "ASR training recipes created for ivrit.ai" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [] + [tool.black] line-length = 120