temp: hardcode custom maxTokens

gary149 · gary149 · commit 48b511881ce2 · 2024-09-23T11:45:37.000+02:00
diff --git a/src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte b/src/lib/components/InferencePlayground/InferencePlaygroundGenerationConfig.svelte
@@ -6,7 +6,54 @@
 	export let conversation: Conversation;
 	export let classNames = "";
 
-	$: modelMaxLength = conversation.model.tokenizerConfig.model_max_length;
+	const customMaxTokens: { [key: string]: number } = {
+		"01-ai/Yi-1.5-34B-Chat": 2048,
+		"HuggingFaceM4/idefics-9b-instruct": 2048,
+		"deepseek-ai/DeepSeek-Coder-V2-Instruct": 16384,
+		"bigcode/starcoder": 8192,
+		"bigcode/starcoderplus": 8192,
+		"HuggingFaceH4/starcoderbase-finetuned-oasst1": 8192,
+		"google/gemma-7b": 8192,
+		"google/gemma-1.1-7b-it": 8192,
+		"google/gemma-2b": 8192,
+		"google/gemma-1.1-2b-it": 8192,
+		"google/gemma-2-27b-it": 8192,
+		"google/gemma-2-9b-it": 4096,
+		"google/gemma-2-2b-it": 8192,
+		"tiiuae/falcon-7b": 8192,
+		"tiiuae/falcon-7b-instruct": 8192,
+		"timdettmers/guanaco-33b-merged": 2048,
+		"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
+		"Qwen/Qwen2.5-72B-Instruct": 32768,
+		"meta-llama/Meta-Llama-3-70B-Instruct": 8192,
+		"CohereForAI/c4ai-command-r-plus-08-2024": 32768,
+		"NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO": 32768,
+		"meta-llama/Llama-2-70b-chat-hf": 8192,
+		"HuggingFaceH4/zephyr-7b-alpha": 17432,
+		"HuggingFaceH4/zephyr-7b-beta": 32768,
+		"mistralai/Mistral-7B-Instruct-v0.1": 32768,
+		"mistralai/Mistral-7B-Instruct-v0.2": 32768,
+		"mistralai/Mistral-7B-Instruct-v0.3": 32768,
+		"mistralai/Mistral-Nemo-Instruct-2407": 32768,
+		"meta-llama/Meta-Llama-3-8B-Instruct": 8192,
+		"mistralai/Mistral-7B-v0.1": 32768,
+		"bigcode/starcoder2-3b": 16384,
+		"bigcode/starcoder2-15b": 16384,
+		"HuggingFaceH4/starchat2-15b-v0.1": 16384,
+		"codellama/CodeLlama-7b-hf": 8192,
+		"codellama/CodeLlama-13b-hf": 8192,
+		"codellama/CodeLlama-34b-Instruct-hf": 8192,
+		"meta-llama/Llama-2-7b-chat-hf": 8192,
+		"meta-llama/Llama-2-13b-chat-hf": 8192,
+		"OpenAssistant/oasst-sft-6-llama-30b": 2048,
+		"TheBloke/vicuna-7B-v1.5-GPTQ": 2048,
+		"HuggingFaceH4/starchat-beta": 8192,
+		"bigcode/octocoder": 8192,
+		"vwxyzjn/starcoderbase-triviaqa": 8192,
+		"lvwerra/starcoderbase-gsm8k": 8192,
+	} as const;
+
+	$: modelMaxLength = customMaxTokens[conversation.model.id] ?? conversation.model.tokenizerConfig.model_max_length;
 	$: maxTokens = Math.min(modelMaxLength ?? GENERATION_CONFIG_SETTINGS["max_tokens"].max, 64_000);
 </script>