@@ -336,8 +336,8 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
336336 <tbody >
337337 <tr>
338338 <td >BPE</td>
339- <td >96.57 </td>
340- <td >4991 </td>
339+ <td >94.45 </td>
340+ <td >5535 </td>
341341 </tr>
342342 <tr>
343343 <td >SentencePiece</td>
@@ -346,13 +346,13 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
346346 </tr>
347347 <tr>
348348 <td >Tiktoken</td>
349- <td >98.17 </td>
350- <td >218 </td>
349+ <td >93.98 </td>
350+ <td >266 </td>
351351 </tr>
352352 <tr>
353353 <td >WordPiece</td>
354- <td >94.97 </td>
355- <td >1053 </td>
354+ <td >91.31 </td>
355+ <td >1301 </td>
356356 </tr>
357357 </tbody >
358358</table >
@@ -372,140 +372,140 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
372372 <tr>
373373 <td >BPE</td>
374374 <td >EleutherAI/gpt-j-6b</td>
375- <td >98.16 </td>
376- <td >217 </td>
375+ <td >95.18 </td>
376+ <td >249 </td>
377377 </tr>
378378 <tr>
379379 <td >BPE</td>
380380 <td >EleutherAI/gpt-neo-125m</td>
381- <td >98.16 </td>
382- <td >217 </td>
381+ <td >95.18 </td>
382+ <td >249 </td>
383383 </tr>
384384 <tr>
385385 <td >BPE</td>
386386 <td >EleutherAI/gpt-neox-20b</td>
387- <td >97.24 </td>
388- <td >217 </td>
387+ <td >95.71 </td>
388+ <td >233 </td>
389389 </tr>
390390 <tr>
391391 <td >BPE</td>
392392 <td >EleutherAI/pythia-12b-deduped</td>
393- <td >97.24 </td>
394- <td >217 </td>
393+ <td >95.71 </td>
394+ <td >233 </td>
395395 </tr>
396396 <tr>
397397 <td >BPE</td>
398398 <td >KoboldAI/fairseq-dense-13B</td>
399- <td >98.16 </td>
400- <td >217 </td>
399+ <td >96.57 </td>
400+ <td >233 </td>
401401 </tr>
402402 <tr>
403403 <td >BPE</td>
404404 <td >NousResearch/Meta-Llama-3-8B-Instruct</td>
405- <td >97.24 </td>
406- <td >217 </td>
405+ <td >95.71 </td>
406+ <td >233 </td>
407407 </tr>
408408 <tr>
409409 <td >BPE</td>
410410 <td >Salesforce/codegen-16B-multi</td>
411- <td >99.08 </td>
412- <td >217 </td>
411+ <td >95.98 </td>
412+ <td >249 </td>
413413 </tr>
414414 <tr>
415415 <td >BPE</td>
416416 <td >Xenova/gpt-4o</td>
417- <td >97.24 </td>
418- <td >217 </td>
417+ <td >94.38 </td>
418+ <td >249 </td>
419419 </tr>
420420 <tr>
421421 <td >BPE</td>
422422 <td >ai-forever/rugpt3large_based_on_gpt2</td>
423- <td >96.31 </td>
424- <td >217 </td>
423+ <td >90.36 </td>
424+ <td >249 </td>
425425 </tr>
426426 <tr>
427427 <td >BPE</td>
428428 <td >bigscience/bloom</td>
429- <td >99.08 </td>
430- <td >217 </td>
429+ <td >97.42 </td>
430+ <td >233 </td>
431431 </tr>
432432 <tr>
433433 <td >BPE</td>
434434 <td >databricks/dolly-v2-3b</td>
435- <td >97.24 </td>
436- <td >217 </td>
435+ <td >95.71 </td>
436+ <td >233 </td>
437437 </tr>
438438 <tr>
439439 <td >BPE</td>
440440 <td >facebook/bart-large-mnli</td>
441- <td >98.16 </td>
442- <td >217 </td>
441+ <td >95.18 </td>
442+ <td >249 </td>
443443 </tr>
444444 <tr>
445445 <td >BPE</td>
446446 <td >facebook/galactica-120b</td>
447- <td >97.24 </td>
448- <td >217 </td>
447+ <td >95.71 </td>
448+ <td >233 </td>
449449 </tr>
450450 <tr>
451451 <td >BPE</td>
452452 <td >facebook/opt-66b</td>
453- <td >98.16 </td>
454- <td >217 </td>
453+ <td >96.57 </td>
454+ <td >233 </td>
455455 </tr>
456456 <tr>
457457 <td >BPE</td>
458458 <td >gpt2</td>
459- <td >98.16 </td>
460- <td >217 </td>
459+ <td >95.18 </td>
460+ <td >249 </td>
461461 </tr>
462462 <tr>
463463 <td >BPE</td>
464464 <td >laion/CLIP-ViT-bigG-14-laion2B-39B-b160k</td>
465- <td >70.97 </td>
466- <td >217 </td>
465+ <td >74.70 </td>
466+ <td >249 </td>
467467 </tr>
468468 <tr>
469469 <td >BPE</td>
470470 <td >microsoft/deberta-base</td>
471- <td >98.16 </td>
472- <td >217 </td>
471+ <td >96.57 </td>
472+ <td >233 </td>
473473 </tr>
474474 <tr>
475475 <td >BPE</td>
476476 <td >roberta-base</td>
477- <td >98.16 </td>
478- <td >217 </td>
477+ <td >95.18 </td>
478+ <td >249 </td>
479479 </tr>
480480 <tr>
481481 <td >BPE</td>
482482 <td >sentence-transformers/all-roberta-large-v1</td>
483- <td >98.16 </td>
484- <td >217 </td>
483+ <td >95.18 </td>
484+ <td >249 </td>
485485 </tr>
486486 <tr>
487487 <td >BPE</td>
488488 <td >stabilityai/stablecode-completion-alpha-3b-4k</td>
489- <td >97.24 </td>
490- <td >217 </td>
489+ <td >95.71 </td>
490+ <td >233 </td>
491491 </tr>
492492 <tr>
493493 <td >BPE</td>
494494 <td >stabilityai/stablelm-2-1_6b</td>
495- <td >97.24 </td>
496- <td >217 </td>
495+ <td >95.71 </td>
496+ <td >233 </td>
497497 </tr>
498498 <tr>
499499 <td >BPE</td>
500500 <td >stabilityai/stablelm-tuned-alpha-7b</td>
501- <td >97.24 </td>
502- <td >217 </td>
501+ <td >95.71 </td>
502+ <td >233 </td>
503503 </tr>
504504 <tr>
505505 <td >BPE</td>
506506 <td >tiiuae/falcon-7b</td>
507- <td >97.24 </td>
508- <td >217 </td>
507+ <td >94.38 </td>
508+ <td >249 </td>
509509 </tr>
510510 <tr>
511511 <td >SentencePiece</td>
@@ -630,92 +630,92 @@ This report is autogenerated and includes tokenizers and detokenizers tests. The
630630 <tr>
631631 <td >Tiktoken</td>
632632 <td >Qwen/Qwen-14B-Chat</td>
633- <td >98.17 </td>
634- <td >109 </td>
633+ <td >92.91 </td>
634+ <td >141 </td>
635635 </tr>
636636 <tr>
637637 <td >Tiktoken</td>
638638 <td >Salesforce/xgen-7b-8k-base</td>
639- <td >98.17 </td>
640- <td >109 </td>
639+ <td >95.20 </td>
640+ <td >125 </td>
641641 </tr>
642642 <tr>
643643 <td >WordPiece</td>
644644 <td >ProsusAI/finbert</td>
645- <td >97.53 </td>
646- <td >81 </td>
645+ <td >91.43 </td>
646+ <td >105 </td>
647647 </tr>
648648 <tr>
649649 <td >WordPiece</td>
650650 <td >bert-base-multilingual-cased</td>
651- <td >97.53 </td>
652- <td >81 </td>
651+ <td >91.43 </td>
652+ <td >105 </td>
653653 </tr>
654654 <tr>
655655 <td >WordPiece</td>
656656 <td >bert-base-uncased</td>
657- <td >97.53 </td>
658- <td >81 </td>
657+ <td >91.43 </td>
658+ <td >105 </td>
659659 </tr>
660660 <tr>
661661 <td >WordPiece</td>
662662 <td >cointegrated/rubert-tiny2</td>
663- <td >91.36 </td>
664- <td >81 </td>
663+ <td >91.43 </td>
664+ <td >105 </td>
665665 </tr>
666666 <tr>
667667 <td >WordPiece</td>
668668 <td >distilbert-base-uncased-finetuned-sst-2-english</td>
669- <td >97.53 </td>
670- <td >81 </td>
669+ <td >91.43 </td>
670+ <td >105 </td>
671671 </tr>
672672 <tr>
673673 <td >WordPiece</td>
674674 <td >google/electra-base-discriminator</td>
675- <td >97.53 </td>
676- <td >81 </td>
675+ <td >91.43 </td>
676+ <td >105 </td>
677677 </tr>
678678 <tr>
679679 <td >WordPiece</td>
680680 <td >google/mobilebert-uncased</td>
681- <td >97.53 </td>
682- <td >81 </td>
681+ <td >94.38 </td>
682+ <td >89 </td>
683683 </tr>
684684 <tr>
685685 <td >WordPiece</td>
686686 <td >jhgan/ko-sbert-sts</td>
687- <td >87.65 </td>
688- <td >81 </td>
687+ <td >91.43 </td>
688+ <td >105 </td>
689689 </tr>
690690 <tr>
691691 <td >WordPiece</td>
692692 <td >prajjwal1/bert-mini</td>
693- <td >97.53 </td>
694- <td >81 </td>
693+ <td >94.38 </td>
694+ <td >89 </td>
695695 </tr>
696696 <tr>
697697 <td >WordPiece</td>
698698 <td >rajiv003/ernie-finetuned-qqp</td>
699- <td >97.53 </td>
700- <td >81 </td>
699+ <td >94.38 </td>
700+ <td >89 </td>
701701 </tr>
702702 <tr>
703703 <td >WordPiece</td>
704704 <td >rasa/LaBSE</td>
705- <td >90.12 </td>
706- <td >81 </td>
705+ <td >80.00 </td>
706+ <td >105 </td>
707707 </tr>
708708 <tr>
709709 <td >WordPiece</td>
710710 <td >sentence-transformers/all-MiniLM-L6-v2</td>
711- <td >87.65 </td>
712- <td >81 </td>
711+ <td >91.43 </td>
712+ <td >105 </td>
713713 </tr>
714714 <tr>
715715 <td >WordPiece</td>
716716 <td >squeezebert/squeezebert-uncased</td>
717- <td >97.53 </td>
718- <td >81 </td>
717+ <td >94.38 </td>
718+ <td >89 </td>
719719 </tr>
720720 </tbody >
721721</table >
0 commit comments