Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,11 @@ class ImageCaptioningInput(BaseModel):

class WhisperInputs(BaseModel):
audio: str
task: typing.Literal["translate", "transcribe"] = "transcribe"
language: str = None
task: typing.Literal["translate", "transcribe"] | None = None
language: str | None = None
return_timestamps: bool = False
decoder_kwargs: dict = None
max_length: int | None = None
decoder_kwargs: dict | None = None

chunk_length_s: float = 30
stride_length_s: typing.Tuple[float, float] = (6, 0)
Expand Down
2 changes: 2 additions & 0 deletions common/whisper.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def whisper(pipeline: PipelineInfo, inputs: WhisperInputs) -> AsrOutput:
generate_kwargs["language"] = inputs.language
if inputs.task:
generate_kwargs["task"] = inputs.task
if inputs.max_length:
generate_kwargs["max_length"] = inputs.max_length
if generate_kwargs:
kwargs["generate_kwargs"] = generate_kwargs

Expand Down
61 changes: 3 additions & 58 deletions scripts/run-dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,64 +19,9 @@ docker build . -f $VARIANT/Dockerfile -t $IMG
docker rm -f $IMG || true
docker run \
-e IMPORTS=$IMPORTS \
-e WAV2LIP_MODEL_IDS="
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

keep these around? easy to test

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes, will revert this one. i did have to add WIHSPER_TOKENIZER_FROM for this model to work. we don't add that for any other whisper model

wav2lip_gan.pth
" \
-e DEFORUM_MODEL_IDS="
Protogen_V2.2.ckpt
"\
-e EMBEDDING_MODEL_IDS="
intfloat/e5-large-v2
intfloat/e5-base-v2
intfloat/multilingual-e5-base
"\
-e MMS_MODEL_IDS="
facebook/mms-1b-all
"\
-e WHISPER_MODEL_IDS="
dmatekenya/whisper-large-v3-chichewa
" \
-e WHISPER_TOKENIZER_FROM="
openai/whisper-large-v3
"\
-e SD_MODEL_IDS="
stabilityai/stable-diffusion-2-inpainting
runwayml/stable-diffusion-inpainting
" \
-e CONTROLNET_MODEL_IDS="
lllyasviel/sd-controlnet-canny
lllyasviel/sd-controlnet-depth
lllyasviel/sd-controlnet-hed
lllyasviel/sd-controlnet-mlsd
lllyasviel/sd-controlnet-normal
lllyasviel/sd-controlnet-openpose
lllyasviel/sd-controlnet-scribble
lllyasviel/sd-controlnet-seg
lllyasviel/control_v11f1e_sd15_tile
ioclab/control_v1p_sd15_brightness
monster-labs/control_v1p_sd15_qrcode_monster/v2
" \
-e DIS_MODEL_IDS="
isnet-general-use.pth
"\
-e U2NET_MODEL_IDS="
u2net
"\
-e SEAMLESS_MODEL_IDS="
facebook/seamless-m4t-v2-large
"\
-e SADTALKER_MODEL_IDS="
SadTalker_V0.0.2_512.safetensors
"\
-e GFPGAN_MODEL_IDS="
GFPGANv1.4
"\
-e ESRGAN_MODEL_IDS="
RealESRGAN_x2plus
"\
-e LLM_MODEL_IDS="
aisingapore/llama3-8b-cpt-sea-lionv2-instruct
"\
-e QUEUE_PREFIX="gooey-gpu/short" \
-e WHISPER_MODEL_IDS=akera/whisper-large-v3-kik-full_v2 \
-e WHISPER_TOKENIZER_FROM=akera/whisper-large-v3-kik-full_v2 \
-e C_FORCE_ROOT=1 \
-e BROKER_URL=${BROKER_URL:-"amqp://"} \
-e RESULT_BACKEND=${RESULT_BACKEND:-"redis://"} \
Expand Down
Loading