diff --git a/README.md b/README.md index 9181969c3..4aad5ff1c 100644 --- a/README.md +++ b/README.md @@ -451,3 +451,5 @@ To find compatible models on the Hub, select the "transformers.js" library tag i 1. **[XLM-RoBERTa](https://huggingface.co/docs/transformers/model_doc/xlm-roberta)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://huggingface.co/papers/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. 1. **[YOLOS](https://huggingface.co/docs/transformers/model_doc/yolos)** (from Huazhong University of Science & Technology) released with the paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://huggingface.co/papers/2106.00666) by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu. 1. **[Youtu-LLM](https://huggingface.co/docs/transformers/model_doc/youtu)** (from the Tencent Youtu Team) released with the paper [Youtu-LLM: Unlocking the Native Agentic Potential for Lightweight Large Language Models](https://huggingface.co/papers/2512.24618) by Junru Lu, Jiarui Qin, Lingfeng Qiao, Yinghui Li, Xinyi Dai, Bo Ke, Jianfeng He, Ruizhi Qiao, Di Yin, Xing Sun, Yunsheng Wu, Yinsong Liu, Shuangyin Liu, Mingkong Tang, Haodong Lin, Jiayi Kuang, Fanxu Meng, Xiaojuan Tang, Yunjia Xi, Junjie Huang, Haotong Yang, Zhenyi Shen, Yangning Li, Qianwen Zhang, Yifei Yu, Siyu An, Junnan Dong, Qiufeng Wang, Jie Wang, Keyu Chen, Wei Wen, Taian Guo, Zhifeng Shen, Daohai Yu, Jiahao Li, Ke Li, Zongyi Li, Xiaoyu Tan. +1. **Zaya** (from Zyphra) released with the paper [ZAYA1-8B Technical Report](https://huggingface.co/papers/2605.05365) by Robert Washbourne, Rishi Iyer, Tomas Figliolia, Henry Zheng, Ryan Lorig-Roach, Sungyeon Yang, Pritish Yuvraj, Quentin Anthony, Yury Tokpanov, Xiao Yang, Ganesh Nanduru, Stephen Ebert, Praneeth Medepalli, Skyler Szot, Srivatsan Rajagopal, Alex Ong, Bhavana Mehta, Beren Millidge. + diff --git a/packages/transformers/docs/snippets/5_supported-models.snippet b/packages/transformers/docs/snippets/5_supported-models.snippet index a265141ba..a296a1abd 100644 --- a/packages/transformers/docs/snippets/5_supported-models.snippet +++ b/packages/transformers/docs/snippets/5_supported-models.snippet @@ -211,4 +211,5 @@ 1. **[XLM](https://huggingface.co/docs/transformers/model_doc/xlm)** (from Facebook) released together with the paper [Cross-lingual Language Model Pretraining](https://huggingface.co/papers/1901.07291) by Guillaume Lample and Alexis Conneau. 1. **[XLM-RoBERTa](https://huggingface.co/docs/transformers/model_doc/xlm-roberta)** (from Facebook AI), released together with the paper [Unsupervised Cross-lingual Representation Learning at Scale](https://huggingface.co/papers/1911.02116) by Alexis Conneau*, Kartikay Khandelwal*, Naman Goyal, Vishrav Chaudhary, Guillaume Wenzek, Francisco Guzmán, Edouard Grave, Myle Ott, Luke Zettlemoyer and Veselin Stoyanov. 1. **[YOLOS](https://huggingface.co/docs/transformers/model_doc/yolos)** (from Huazhong University of Science & Technology) released with the paper [You Only Look at One Sequence: Rethinking Transformer in Vision through Object Detection](https://huggingface.co/papers/2106.00666) by Yuxin Fang, Bencheng Liao, Xinggang Wang, Jiemin Fang, Jiyang Qi, Rui Wu, Jianwei Niu, Wenyu Liu. -1. **[Youtu-LLM](https://huggingface.co/docs/transformers/model_doc/youtu)** (from the Tencent Youtu Team) released with the paper [Youtu-LLM: Unlocking the Native Agentic Potential for Lightweight Large Language Models](https://huggingface.co/papers/2512.24618) by Junru Lu, Jiarui Qin, Lingfeng Qiao, Yinghui Li, Xinyi Dai, Bo Ke, Jianfeng He, Ruizhi Qiao, Di Yin, Xing Sun, Yunsheng Wu, Yinsong Liu, Shuangyin Liu, Mingkong Tang, Haodong Lin, Jiayi Kuang, Fanxu Meng, Xiaojuan Tang, Yunjia Xi, Junjie Huang, Haotong Yang, Zhenyi Shen, Yangning Li, Qianwen Zhang, Yifei Yu, Siyu An, Junnan Dong, Qiufeng Wang, Jie Wang, Keyu Chen, Wei Wen, Taian Guo, Zhifeng Shen, Daohai Yu, Jiahao Li, Ke Li, Zongyi Li, Xiaoyu Tan. \ No newline at end of file +1. **[Youtu-LLM](https://huggingface.co/docs/transformers/model_doc/youtu)** (from the Tencent Youtu Team) released with the paper [Youtu-LLM: Unlocking the Native Agentic Potential for Lightweight Large Language Models](https://huggingface.co/papers/2512.24618) by Junru Lu, Jiarui Qin, Lingfeng Qiao, Yinghui Li, Xinyi Dai, Bo Ke, Jianfeng He, Ruizhi Qiao, Di Yin, Xing Sun, Yunsheng Wu, Yinsong Liu, Shuangyin Liu, Mingkong Tang, Haodong Lin, Jiayi Kuang, Fanxu Meng, Xiaojuan Tang, Yunjia Xi, Junjie Huang, Haotong Yang, Zhenyi Shen, Yangning Li, Qianwen Zhang, Yifei Yu, Siyu An, Junnan Dong, Qiufeng Wang, Jie Wang, Keyu Chen, Wei Wen, Taian Guo, Zhifeng Shen, Daohai Yu, Jiahao Li, Ke Li, Zongyi Li, Xiaoyu Tan. +1. **Zaya** (from Zyphra) released with the paper [ZAYA1-8B Technical Report](https://huggingface.co/papers/2605.05365) by Robert Washbourne, Rishi Iyer, Tomas Figliolia, Henry Zheng, Ryan Lorig-Roach, Sungyeon Yang, Pritish Yuvraj, Quentin Anthony, Yury Tokpanov, Xiao Yang, Ganesh Nanduru, Stephen Ebert, Praneeth Medepalli, Skyler Szot, Srivatsan Rajagopal, Alex Ong, Bhavana Mehta, Beren Millidge. diff --git a/packages/transformers/src/configs.js b/packages/transformers/src/configs.js index f6df6c664..911691165 100644 --- a/packages/transformers/src/configs.js +++ b/packages/transformers/src/configs.js @@ -216,6 +216,13 @@ function getNormalizedConfig(config) { mapping['dim_kv'] = config.model_type === 'deepseek_v4' ? 'head_dim' : 'qk_head_dim'; mapping['num_attention_heads'] = 'num_attention_heads'; break; + case 'zaya': + mapping['num_heads'] = 'num_key_value_heads'; + mapping['num_layers'] = 'num_hidden_layers'; + mapping['hidden_size'] = 'hidden_size'; + mapping['dim_kv'] = 'head_dim'; + mapping['num_attention_heads'] = 'num_attention_heads'; + break; // Encoder-decoder models case 't5': @@ -420,6 +427,16 @@ export function getCacheNames(config, options) { } } return names; + } else if (config.model_type === 'zaya') { + const { num_hidden_layers, cca_time1 } = /** @type {any} */ (config); + const stride = cca_time1 ?? 1; + for (let i = 0; i < num_hidden_layers; i += stride) { + names.add(`${pkv_prefix}.${i}.key`); + names.add(`${pkv_prefix}.${i}.value`); + names.add(`${pkv_prefix}.${i}.conv_state`); + names.add(`${pkv_prefix}.${i}.shift_state`); + } + return names; } else if (['lfm2_vl', 'qwen3_5', 'qwen3_5_moe', 'voxtral_realtime'].includes(config.model_type)) { let subConfig; if (config.model_type === 'voxtral_realtime' && options?.session_name === 'audio_encoder') { diff --git a/packages/transformers/src/models/models.js b/packages/transformers/src/models/models.js index 6bdd55338..4d5fc01e3 100644 --- a/packages/transformers/src/models/models.js +++ b/packages/transformers/src/models/models.js @@ -195,5 +195,6 @@ export * from './xlm/modeling_xlm.js'; export * from './xlm_roberta/modeling_xlm_roberta.js'; export * from './yolos/modeling_yolos.js'; export * from './youtu/modeling_youtu.js'; +export * from './zaya/modeling_zaya.js'; export { PreTrainedModel } from './modeling_utils.js'; diff --git a/packages/transformers/src/models/registry.js b/packages/transformers/src/models/registry.js index 06c1bc24d..4e1bb8ef8 100644 --- a/packages/transformers/src/models/registry.js +++ b/packages/transformers/src/models/registry.js @@ -181,6 +181,7 @@ const MODEL_MAPPING_NAMES_DECODER_ONLY = new Map([ ['modernbert-decoder', 'ModernBertDecoderModel'], ['hunyuan_v1_dense', 'HunYuanDenseV1Model'], ['youtu', 'YoutuModel'], + ['zaya', 'ZayaModel'], ]); export const MODEL_FOR_SPEECH_SEQ_2_SEQ_MAPPING_NAMES = new Map([ @@ -329,6 +330,7 @@ export const MODEL_FOR_CAUSAL_LM_MAPPING_NAMES = new Map([ ['modernbert-decoder', 'ModernBertDecoderForCausalLM'], ['hunyuan_v1_dense', 'HunYuanDenseV1ForCausalLM'], ['youtu', 'YoutuForCausalLM'], + ['zaya', 'ZayaForCausalLM'], // Also image-text-to-text ['phi3_v', 'Phi3VForCausalLM'], diff --git a/packages/transformers/src/models/zaya/modeling_zaya.js b/packages/transformers/src/models/zaya/modeling_zaya.js new file mode 100644 index 000000000..501229fe1 --- /dev/null +++ b/packages/transformers/src/models/zaya/modeling_zaya.js @@ -0,0 +1,5 @@ +import { PreTrainedModel } from '../modeling_utils.js'; + +export class ZayaPreTrainedModel extends PreTrainedModel {} +export class ZayaModel extends ZayaPreTrainedModel {} +export class ZayaForCausalLM extends ZayaPreTrainedModel {} diff --git a/packages/transformers/src/utils/hub.js b/packages/transformers/src/utils/hub.js index 808a32010..09f42e168 100755 --- a/packages/transformers/src/utils/hub.js +++ b/packages/transformers/src/utils/hub.js @@ -8,7 +8,14 @@ import { apis, env } from '../env.js'; import { DefaultProgressCallback, dispatchCallback } from './core.js'; import { FileResponse } from './hub/FileResponse.js'; import { FileCache } from './cache/FileCache.js'; -import { handleError, isValidUrl, pathJoin, isValidHfModelId, makePretrainedOptionsKey, readResponse } from './hub/utils.js'; +import { + handleError, + isValidUrl, + pathJoin, + isValidHfModelId, + makePretrainedOptionsKey, + readResponse, +} from './hub/utils.js'; import { getCache, tryCache } from './cache.js'; import { get_file_metadata } from './model_registry/get_file_metadata.js'; import { logger } from './logger.js'; diff --git a/packages/transformers/tests/models/zaya/test_modeling_zaya.js b/packages/transformers/tests/models/zaya/test_modeling_zaya.js new file mode 100644 index 000000000..fcda4b9e1 --- /dev/null +++ b/packages/transformers/tests/models/zaya/test_modeling_zaya.js @@ -0,0 +1,51 @@ +import { PreTrainedTokenizer, ZayaForCausalLM } from "../../../src/transformers.js"; + +import { MAX_MODEL_LOAD_TIME, MAX_TEST_EXECUTION_TIME, MAX_MODEL_DISPOSE_TIME, DEFAULT_MODEL_OPTIONS } from "../../init.js"; + +export default () => { + describe("ZayaForCausalLM", () => { + const model_id = "onnx-internal-testing/tiny-random-ZayaForCausalLM"; + /** @type {ZayaForCausalLM} */ + let model; + /** @type {PreTrainedTokenizer} */ + let tokenizer; + beforeAll(async () => { + model = await ZayaForCausalLM.from_pretrained(model_id, DEFAULT_MODEL_OPTIONS); + tokenizer = await PreTrainedTokenizer.from_pretrained(model_id); + tokenizer.padding_side = "left"; + }, MAX_MODEL_LOAD_TIME); + + it( + "batch_size=1", + async () => { + const inputs = tokenizer("hello"); + const outputs = await model.generate({ + ...inputs, + max_length: 10, + }); + expect(outputs.tolist()).toEqual([[2n, 23391n, 106n, 155331n, 155331n, 155331n, 155331n, 155331n, 155331n, 155331n]]); + }, + MAX_TEST_EXECUTION_TIME, + ); + + it( + "batch_size>1", + async () => { + const inputs = tokenizer(["hello", "hello world"], { padding: true }); + const outputs = await model.generate({ + ...inputs, + max_length: 10, + }); + expect(outputs.tolist()).toEqual([ + [0n, 2n, 23391n, 106n, 155331n, 155331n, 155331n, 155331n, 155331n, 155331n], + [2n, 23391n, 1902n, 106n, 155331n, 155331n, 155331n, 155331n, 155331n, 155331n], + ]); + }, + MAX_TEST_EXECUTION_TIME, + ); + + afterAll(async () => { + await model?.dispose(); + }, MAX_MODEL_DISPOSE_TIME); + }); +};