From 68e3230b07965235785956d843ab8415990f7c42 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Sun, 16 Jun 2024 22:18:03 +0000 Subject: [PATCH 1/6] Add existing project files to Git --- __pycache__/openai_perf.cpython-310.pyc | Bin 0 -> 2718 bytes __pycache__/vllm_perf.cpython-310.pyc | Bin 0 -> 3883 bytes input_examples/llama3/128_tokens | 1 + input_examples/llama3/2048_tokens | 1 + input_examples/token_counter.py | 6 + llmperf.py | 116 ++++----------- openai_perf.py | 35 ++--- requirements.txt | 8 +- tgi_perf.py | 44 ------ triton_perf.py | 184 ------------------------ vllm_perf.py | 137 ------------------ 11 files changed, 55 insertions(+), 477 deletions(-) create mode 100644 __pycache__/openai_perf.cpython-310.pyc create mode 100644 __pycache__/vllm_perf.cpython-310.pyc create mode 100644 input_examples/llama3/128_tokens create mode 100644 input_examples/llama3/2048_tokens create mode 100644 input_examples/token_counter.py delete mode 100644 tgi_perf.py delete mode 100644 triton_perf.py delete mode 100644 vllm_perf.py diff --git a/__pycache__/openai_perf.cpython-310.pyc b/__pycache__/openai_perf.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9bce24455c7a5ddc96862ae32acafed7175cd05e GIT binary patch literal 2718 zcmbW3O>g5w7{_PEj^m_BH!0oSmPK@7*`n~$t^n;`ssdV6UL*n%m5?GifF zb|hL7iPctWd+3SpfRqa#$T!5PXD*Zj@PEch-B3yqj{J@1hd10e{ZF5y$M9 zAF@M!D4Le!QXIHLF%X#6sy4=BO+kZmxL?2>z6TSr=WNK1*f!r5M_lqFAqD<{kl6o8 zcE|_(BlbiL#qJ#S9Iyq3NuBD(DaT+rt?y zx?wyMsZLL6FY0R*^KR^fkyc)(V#OKrFXNd8#`r2~2k7lI&=@Id4I@uuszc2K}hlN4%R@LHiAADp3<} z>oUrbtqGE*uuYjKJ|%Y=7cbz!_(pu6(zM4+oijV#Nyty7Gik#) z({)sy@gu&CWJVe%G?R2TG(Uw*eS%~kuy@$L^RB6V&4zq;{vinfsRa_-FuK63Vx^?? zOIV^J3>w2l8;=;@oMU(!eM0I;7?PBlKq#D*kW%7LN9;d<7N~l(rGtKJi{M|EUstnO zxN535X(yCbB_Owpn)0SWk~O)=eUq&EHOZ<$ie)URO_L5l0xg7#fJEQLxT%RIvTAZ` z1Fhh|3=Ckfw}A;zHU?qoRnJLY3yt1eGiHA0;!^uX6KMifD*ob(6WN2HwY_NfIqxGw~X2 zFX1;!{9Z?W%Iz2V>3eAXXFAl44s}E40Uj>XNfZy3OXtK**=&n#E73ELt*d8XAZw}OW%{AlQnhk=tWCsE=+|fjiJvx}nkM!I z-iIV~ljNGD7$sWaH#fh(K6dIjVEz|0y-drSl~Y>F|83&WX@}>~3i=JyH>SjpGLG)Y z)RUSAJj_H8t*(F-0~|rPah9Fny8}-dI(i0HlLSgsD@*^?bY0;Ua0DQ?N^-G zNe=p&^VobD)Mh>;(M&3ul%VF)FdAW?frkmqSJ3qL1WA2Pu6}|$q$3MKh@bO^lX1DG zmI~jf#4iXHllX-Ar3Eji!l3^yl%(H#@K#u;X0;#{-N`C_rVhL4B7uhvPULJ9#F R9|W#&=iHla*)7kO{sPY3EIa@J literal 0 HcmV?d00001 diff --git a/__pycache__/vllm_perf.cpython-310.pyc b/__pycache__/vllm_perf.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..243a7aad0c01877ff222b95707d2f4e0a14cb7fb GIT binary patch literal 3883 zcmbVPU2hx56`h&=&}v0Wlx!t(oHj}8q+!}fM%vgx>bg#v7I|=mAW>0(K(U}XOPca7 zmzS@$~BZN*^=uh*X4%1gmOb({(`kP zzrhMvTNKXSyMHZKzv!mpWH=gp)>Yk9w|TMlk$yPp{dqJPj^dBhfVx&~7hW%W(X*nu zBblX9o)0COO&RyQd14|nOk>rf&CV_d6%RL+dK<>DDc|Rk!wM;$IFH#eJ8_<}$Kr?` z@gvc9q$Az0gF`VDSgyCbcj^>gn#njR){V-wiBz0sCXRaOD?Di)j^iE%uCLjYm(Zg> zfiXC5DQMUy&LKM%C;XT{<%j&3VF_5ylu7T2!bF2c0?ugGK%;*rK{{#a${*c7d@EFVNesf#sT z9e*^o=l_>e-U)IYM`j|^wx?(-g&&XXRvS@QN^E|T$J!L0J?mm6Qu!!~EnIC!UB(BB zHePtzbd|yEPLiZ*gSwXOiiQWVI%VI!v2#C5+~ zj3k~-xr#CLt1yC>KmS?ixpm$58a(6&ZyM7#Q5ttO4n=L>O0r%z(YLo3$2xTtu|zGC zS%K*^2Qm0VM`jo3luLHIprCr4-ugM2t7Kk-DVhuOETK!cU<9WXx!ivicrEU(scYz5 zV5Exly!KX)2!b$Gm1K2nCipQwQX5clUZ2 zslI&FXi5LZt@|)v@sflzlXQrNMu2P};)!!CrXb-{2lKlaPrSzt)_)68_>}#f9lBG{ z_JL5>jEna@w7i?_(3!k^gHg}a+hWwW#jX-b{aw|-Y~`XvfQys(g!I1xk^ zwqK#52|Dp*Ng9I2_f`Q>iwE2XWyG@$7ql5?FQ!d9Py}B|8&X`$onl=Mb@v{anq-3r zyf?bIJl|DqMk`XHqSc2xRxT3}zuIoOa@t4{mcB&NH^;kQ(_9tr<`+s`mUkhxLffGY zwS>E>uH(r9?}%@;64KL>ce0crQbNlHD4|nw?8xc6=e1A;G%W7=A`t1mgj?^!oG03` zb0UsG-9v61(zZwZ6ZVe~I>-0DO8uaVoJ}^24~jYHUxHeCR{zj5b+$l5`jW_dnMmkO z-3#c3xX08c3{axYsyE5J1XFm37lXJ6?{!V@ex%96U#5pdIdui5;+>T)^$ztCxr)$a zrkmKX(rJ`>3fYHZK|ctsIokb$4v)@m z5AW3%IY+HKchLB8x??rRuQkV+Y~HY%Xq9bFQQ#=FtP)rvoq7ufy*BKStE;!kSOBfd zECTX3Jk*5wc`;~F6#32<2iHl7B6DA5`QScD)r*igMS3zE4NcUgv?ZcgcLvLu*KR#; z+|OxAw3UF|c13-FYLA3-b_rB!JZSAmU)wr*fr`fg6;i?Gu3Bp#KDPhlQefCcFc zAFuFjo)KS!(LbWMA2T|$8ne&PAk0YiaN7?Nxgnz# zGk)Thh|DlU^hUG>Zei_75K0@b-N`G!&$ZYM8S<1ia$1v(qX%jYvncdb(er%FRwQEO zOJ{)k>O<;1=XGuBBiEaqu8qlyxN1DsbSC71E`FlImBKMX@prB`xLpBIk?SV`tZva_ z?A(8(nnm`lA2;xX)>yhE(u9V(gZ700|A}6t^C6eC8JzuJ5y6*`!STpJ>7w*d`Y3(+ z*AXpQ8<^#j5Lp*FuGrmcuUwQ^sP`~l5Q?7ewl5Vf<>O_hOg`P&r1o~1 zp=={{u`AGa>77>>p3T&-%5&_ZI-z}qR$V23KK+Rp8bzEhgYh-0T_;14$;zSiFpGj+ n*W6C(?G(W+{6ou=_%`v^3P+1I5LxLLh+qr9t3fMRYqY)xFiT9% literal 0 HcmV?d00001 diff --git a/input_examples/llama3/128_tokens b/input_examples/llama3/128_tokens new file mode 100644 index 0000000..4ac9f08 --- /dev/null +++ b/input_examples/llama3/128_tokens @@ -0,0 +1 @@ +Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. Mr. Dursley made drills. He was a big, beefy man with hardly any neck, although he did have a very large moustache. Mrs. Dursley was thin and blonde and had twice the usual amount of neck, which came in very useful as she spent so much of her time spying on the neighbours. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. The Dursleys had everything they diff --git a/input_examples/llama3/2048_tokens b/input_examples/llama3/2048_tokens new file mode 100644 index 0000000..dc64a4e --- /dev/null +++ b/input_examples/llama3/2048_tokens @@ -0,0 +1 @@ +Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursley pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street. The Dursleys knew that the Potters had a small son, too, but they had never even seen him. This boy was another good reason for keeping the Potters away; they didn't want Dudley mixing with a child like that. When Mr. and Mrs. Dursley woke up on the dull, gray Tuesday our story starts, there was nothing about the cloudy sky outside to suggest that strange and mysterious things would soon be happening all over the country. Mr. Dursley hummed as he picked out his most boring tie for work, and Mrs. Dursley gossiped away happily as she wrestled a screaming Dudley into his high chair. None of them noticed a large, tawny owl flutter past the window. At half past eight, Mr. Dursley picked up his briefcase, pecked Mrs. Dursley on the cheek, and tried to kiss Dudley good-bye but missed, because Dudley was now having a tantrum and throwing his cereal at the walls. Little tyke,' chortled Mr. Dursley as he left the house. He got into his car and backed out of number four's drive. It was on the corner of the street that he noticed the first sign of something peculiar a cat reading a map. For a second, Mr. Dursley didn't realize what he had seen then he jerked his head around to look again. There was a tabby cat standing on the corner of Privet Drive, but there wasn't a map in sight. What could he have been thinking of? It must have been a trick of the light. Mr. Dursley blinked and stared at the cat. It stared back. As Mr. Dursley drove around the corner and up the road, he watched the cat in his mirror. It was now reading the sign that said Privet Drive no, looking at the sign; cats couldn't read maps or signs. Mr. Dursley gave himself a little shake and put the cat out of his mind. As he drove toward town he thought of nothing except a large order of drills he was hoping to get that day. But on the edge of town, drills were driven out of his mind by something else. As he sat in the usual morning traffic jam, he couldn't help noticing that there seemed to be a lot of strangely dressed people about. People in cloaks. Mr. Dursley couldn't bear people who dressed in funny clothes the getups you saw on young people! He supposed this was some stupid new fashion. He drummed his fingers on the steering wheel and his eyes fell on a huddle of these weirdos standing quite close by. They were whispering excitedly together. Mr. Dursley was enraged to see that a couple of them weren't young at all; why, that man had to be older than he was, and wearing an emerald-green cloak! The nerve of him! But then it struck Mr. Dursley that this was probably some silly stunt these people were obviously collecting for something . . . yes, that would be it. The traffic moved on and a few minutes later, Mr. Dursley arrived in the Grunnings parking lot, his mind back on drills. Mr. Dursley always sat with his back to the window in his office on the ninth floor. If he hadn't, he might have found it harder to concentrate on drills that morning. He didn't see the owls swoop- ing past in broad daylight, though people down in the street did; they pointed and gazed open-mouthed as owl after owl sped overhead. Most of them had never seen an owl even at nighttime. Mr. Dursley, however, had a perfectly normal, owl-free morning. He yelled at five different people. He made several important telephone calls and shouted a bit more. He was in a very good mood until lunchtime, when he thought he'd stretch his legs and walk across the road to buy himself a bun from the bakery. He'd forgotten all about the people in cloaks until he passed a group of them next to the baker's. He eyed them angrily as he passed. He didn't know why, but they made him uneasy. This bunch were whispering excitedly, too, and he couldn't see a single collecting tin. It was on his way back past them, clutching a large doughnut in a bag, that he caught a few words of what they were saying. 'The Potters, that's right, that's what I heard ' ' yes, their son, Harry ' Mr. Dursley stopped dead. Fear flooded him. He looked back at the whisperers as if he wanted to say something to them, but thought better of it. He dashed back across the road, hurried up to his office,snapped at his secretary not to disturb him, seized his telephone,and had almost finished dialing his home number when hechanged his mind. He put the receiver back down and stroked hismustache, thinking . . . no, he was being stupid. Potter wasn't suchan unusual name. He was sure there were lots of people called Potter who had a son called Harry. Come to think of it, he wasn't evensure his nephew was called Harry. He'd never even seen the boy. It might have been Harvey. Or Harold. There was no point in worrying Mrs. Dursley; she always got so upset at any mention of hersister. He didn't blame her if he'd had a sister like that . . . but allthe same, those people in cloaks . . .He found it a lot harder to concentrate on drills that afternoonand when he left the building at five o'clock, he was still so worriedthat he walked straight into someone just outside the door.'Sorry,' he grunted, as the tiny old man stumbled and almostfell. It was a few seconds before Mr. Dursley realized that the manwas wearing a violet cloak. He didn't seem at all upset at being almost knocked to the ground. On the contrary, his face split into awide smile and he said in a squeaky voice that made passersby stare,'Don't be sorry, my dear sir, for nothing could upset me today! Rejoice, for You-Know-Who has gone at last! Even Muggles like yourself should be celebrating, this happy, happy day!'And the old man hugged Mr. Dursley around the middle andwalked off.Mr. Dursley stood rooted to the spot. He had been hugged by acomplete stranger. He also thought he had been called a Muggle,whatever that was. He was rattled. He hurried to his car and set offfor home, hoping he was imagining things, which he had neverhoped before, because he didn't approve of imagination.As he pulled into the driveway of number four, the first thing hesaw and it didn't improve his mood was the tabby cat he'dspotted that morning. It was now sitting on his garden wall. Hewas sure it was the same one; it had the same markings around itseyes.'Shoo!' said Mr. Dursley loudly. The cat didn't move. It just gave him a stern look. Was this normal cat behavior? Mr. Dursley wondered. Trying to pull himself together, he let himself into the house. He was still determined not tomention anything to his wife.Mrs. Dursley had had a nice, normal day. She told him over dinner all about Mrs. Next Door's problems with her daughter and how Dudley had learnt a new word (‘Shan’t!’). Mr Dursley tried to act normally. When Dudley had been put to bed, he went into the living-room in time to catch the last report on the evening news: ‘And finally, bird-watchers everywhere have reported that the nation’s owls have been behaving very unusually today. Although owls normally hunt at night and are hardly ever seen in daylight, there have been hundreds of sightings of these birds flying in every direction since sunrise. Experts are unable to explain why the owls have suddenly changed their sleeping pattern.’ The news reader allowed himself a grin. ‘Most mysterious. And now, over to Jim McGuffin with the weather. Going to be any more showers of owls tonight, Jim?’ ‘Well, Ted,’ said the weatherman, ‘I don’t know about that, but it’s diff --git a/input_examples/token_counter.py b/input_examples/token_counter.py new file mode 100644 index 0000000..587c34c --- /dev/null +++ b/input_examples/token_counter.py @@ -0,0 +1,6 @@ +from transformers import AutoTokenizer +tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B") +text = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursley pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street. The Dursleys knew that the Potters had a small son, too, but they had never even seen him. This boy was another good reason for keeping the Potters away; they didn't want Dudley mixing with a child like that. When Mr. and Mrs. Dursley woke up on the dull, gray Tuesday our story starts, there was nothing about the cloudy sky outside to suggest that strange and mysterious things would soon be happening all over the country. Mr. Dursley hummed as he picked out his most boring tie for work, and Mrs. Dursley gossiped away happily as she wrestled a screaming Dudley into his high chair. None of them noticed a large, tawny owl flutter past the window. At half past eight, Mr. Dursley picked up his briefcase, pecked Mrs. Dursley on the cheek, and tried to kiss Dudley good-bye but missed, because Dudley was now having a tantrum and throwing his cereal at the walls. Little tyke,' chortled Mr. Dursley as he left the house. He got into his car and backed out of number four's drive. It was on the corner of the street that he noticed the first sign of something peculiar a cat reading a map. For a second, Mr. Dursley didn't realize what he had seen then he jerked his head around to look again. There was a tabby cat standing on the corner of Privet Drive, but there wasn't a map in sight. What could he have been thinking of? It must have been a trick of the light. Mr. Dursley blinked and stared at the cat. It stared back. As Mr. Dursley drove around the corner and up the road, he watched the cat in his mirror. It was now reading the sign that said Privet Drive no, looking at the sign; cats couldn't read maps or signs. Mr. Dursley gave himself a little shake and put the cat out of his mind. As he drove toward town he thought of nothing except a large order of drills he was hoping to get that day. But on the edge of town, drills were driven out of his mind by something else. As he sat in the usual morning traffic jam, he couldn't help noticing that there seemed to be a lot of strangely dressed people about. People in cloaks. Mr. Dursley couldn't bear people who dressed in funny clothes the getups you saw on young people! He supposed this was some stupid new fashion. He drummed his fingers on the steering wheel and his eyes fell on a huddle of these weirdos standing quite close by. They were whispering excitedly together. Mr. Dursley was enraged to see that a couple of them weren't young at all; why, that man had to be older than he was, and wearing an emerald-green cloak! The nerve of him! But then it struck Mr. Dursley that this was probably some silly stunt these people were obviously collecting for something . . . yes, that would be it. The traffic moved on and a few minutes later, Mr. Dursley arrived in the Grunnings parking lot, his mind back on drills. Mr. Dursley always sat with his back to the window in his office on the ninth floor. If he hadn't, he might have found it harder to concentrate on drills that morning. He didn't see the owls swoop- ing past in broad daylight, though people down in the street did; they pointed and gazed open-mouthed as owl after owl sped overhead. Most of them had never seen an owl even at nighttime. Mr. Dursley, however, had a perfectly normal, owl-free morning. He yelled at five different people. He made several important telephone calls and shouted a bit more. He was in a very good mood until lunchtime, when he thought he'd stretch his legs and walk across the road to buy himself a bun from the bakery. He'd forgotten all about the people in cloaks until he passed a group of them next to the baker's. He eyed them angrily as he passed. He didn't know why, but they made him uneasy. This bunch were whispering excitedly, too, and he couldn't see a single collecting tin. It was on his way back past them, clutching a large doughnut in a bag, that he caught a few words of what they were saying. 'The Potters, that's right, that's what I heard ' ' yes, their son, Harry ' Mr. Dursley stopped dead. Fear flooded him. He looked back at the whisperers as if he wanted to say something to them, but thought better of it. He dashed back across the road, hurried up to his office,snapped at his secretary not to disturb him, seized his telephone,and had almost finished dialing his home number when hechanged his mind. He put the receiver back down and stroked hismustache, thinking . . . no, he was being stupid. Potter wasn't suchan unusual name. He was sure there were lots of people called Potter who had a son called Harry. Come to think of it, he wasn't evensure his nephew was called Harry. He'd never even seen the boy. It might have been Harvey. Or Harold. There was no point in worrying Mrs. Dursley; she always got so upset at any mention of hersister. He didn't blame her if he'd had a sister like that . . . but allthe same, those people in cloaks . . .He found it a lot harder to concentrate on drills that afternoonand when he left the building at five o'clock, he was still so worriedthat he walked straight into someone just outside the door.'Sorry,' he grunted, as the tiny old man stumbled and almostfell. It was a few seconds before Mr. Dursley realized that the manwas wearing a violet cloak. He didn't seem at all upset at being almost knocked to the ground. On the contrary, his face split into awide smile and he said in a squeaky voice that made passersby stare,'Don't be sorry, my dear sir, for nothing could upset me today! Rejoice, for You-Know-Who has gone at last! Even Muggles like yourself should be celebrating, this happy, happy day!'And the old man hugged Mr. Dursley around the middle andwalked off.Mr. Dursley stood rooted to the spot. He had been hugged by acomplete stranger. He also thought he had been called a Muggle,whatever that was. He was rattled. He hurried to his car and set offfor home, hoping he was imagining things, which he had neverhoped before, because he didn't approve of imagination.As he pulled into the driveway of number four, the first thing hesaw and it didn't improve his mood was the tabby cat he'dspotted that morning. It was now sitting on his garden wall. Hewas sure it was the same one; it had the same markings around itseyes.'Shoo!' said Mr. Dursley loudly. The cat didn't move. It just gave him a stern look. Was this normal cat behavior? Mr. Dursley wondered. Trying to pull himself together, he let himself into the house. He was still determined not tomention anything to his wife.Mrs. Dursley had had a nice, normal day. She told him over dinner all about Mrs. Next Door's problems with her daughter and how Dudley had learnt a new word (‘Shan’t!’). Mr Dursley tried to act normally. When Dudley had been put to bed, he went into the living-room in time to catch the last report on the evening news: ‘And finally, bird-watchers everywhere have reported that the nation’s owls have been behaving very unusually today. Although owls normally hunt at night and are hardly ever seen in daylight, there have been hundreds of sightings of these birds flying in every direction since sunrise. Experts are unable to explain why the owls have suddenly changed their sleeping pattern.’ The news reader allowed himself a grin. ‘Most mysterious. And now, over to Jim McGuffin with the weather. Going to be any more showers of owls tonight, Jim?’ ‘Well, Ted,’ said the weatherman, ‘I don’t know about that, but it’s" +tokens = tokenizer.tokenize(text) +num_tokens = len(tokens) +print(f"Number of tokens in your text: {num_tokens}") diff --git a/llmperf.py b/llmperf.py index 28205a4..4d79cf3 100644 --- a/llmperf.py +++ b/llmperf.py @@ -1,8 +1,5 @@ import argparse import openai_perf -import tgi_perf -import vllm_perf -import triton_perf import asyncio import math import json @@ -60,59 +57,24 @@ async def send_sampled_request_periodically(request, samples, qps, t, total): def run_ttft(args): prompt = read_prompt_from_file(args.prompt_file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.ttft_measurer(prompt, args) - elif args.engine == "openai": - measurer = openai_perf.ttft_measurer(prompt, args) - elif args.engine == "tgi": - measurer = tgi_perf.ttft_measurer(prompt, args) - elif args.engine == "triton": - measurer = triton_perf.ttft_measurer(prompt, args) - else: - print(f"TTFT test not implemented for {args.engine}") - return + measurer = openai_perf.ttft_measurer(prompt, args) run_test_n_times(measurer, args.iterations) def run_tpot(args): prompt = read_prompt_from_file(args.prompt_file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.tpot_measurer(prompt, args) - elif args.engine == "openai": - measurer = openai_perf.tpot_measurer(prompt, args) - elif args.engine == "tgi": - measurer = tgi_perf.tpot_measurer(prompt, args) - elif args.engine == "triton": - measurer = triton_perf.tpot_measurer(prompt, args) - else: - print(f"TPOT test not implemented for {args.engine}") - return + measurer = openai_perf.tpot_measurer(prompt, args) asyncio.run(async_run_test_n_times(measurer, args.iterations)) def run_static_batch(args): prompt = read_prompt_from_file(args.prompt_file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.static_batch_measurer(prompt, args) - else: - print(f"Static batch test not implemented for {args.engine}") - return run_test_n_times(measurer, args.iterations) def run_rate_throughput(args): prompt = read_prompt_from_file(args.prompt_file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.rate_throughput_measurer(prompt, args) - elif args.engine == "openai": - measurer = openai_perf.rate_throughput_measurer(prompt, args) - elif args.engine == "tgi": - measurer = tgi_perf.rate_throughput_measurer(prompt, args) - elif args.engine == "triton": - measurer = triton_perf.rate_throughput_measurer(prompt, args) - else: - print(f"Rate throughput test not implemented for {args.engine}") - return + measurer = openai_perf.rate_throughput_measurer(prompt, args) async def wrapper(): return await send_request_periodically(measurer, args.qps, args.t, args.total_requests) @@ -122,17 +84,7 @@ def run_rate_sampled_throughput(args): with open(args.dataset, 'r') as file: samples = json.load(file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.sample_rate_throughput_measurer(args) - elif args.engine == "openai": - measurer = openai_perf.sample_rate_throughput_measurer(args) - elif args.engine == "tgi": - measurer = tgi_perf.sample_rate_throughput_measurer(args) - elif args.engine == "triton": - measurer = triton_perf.sample_rate_throughput_measurer(args) - else: - print(f"Rate sampled throughput test not implemented for {args.engine}") - return + measurer = openai_perf.sample_rate_throughput_measurer(args) async def wrapper(): return await send_sampled_request_periodically(measurer, samples, args.qps, args.t, args.total_requests) @@ -142,43 +94,23 @@ def run_rate_sampled_output_throughput(args): with open(args.dataset, 'r') as file: samples = json.load(file) measurer = None - if args.engine == "vllm": - measurer = vllm_perf.sample_output_rate_throughput_measurer(args) - elif args.engine == "tgi": - measurer = tgi_perf.sample_output_rate_throughput_measurer(args) - elif args.engine == "openai": - measurer = openai_perf.sample_output_rate_throughput_measurer(args) - elif args.engine == "triton": - measurer = triton_perf.sample_output_rate_throughput_measurer(args) - else: - print(f"Rate sampled throughput test not implemented for {args.engine}") - return + measurer = openai_perf.sample_output_rate_throughput_measurer(args) async def wrapper(): return await send_sampled_request_periodically(measurer, samples, args.qps, args.t, args.total_requests) asyncio.run(async_run_test_n_times(wrapper, args.iterations)) -def add_engines_parser(base_parser, vllm_batch_size = False): +def add_parser(base_parser, vllm_batch_size = False): engine_parser = base_parser.add_subparsers(title="Engine", dest="engine", required=True) - vllm_parser = engine_parser.add_parser("vllm", help="vLLM Engine") - vllm_parser.add_argument("--model", type=str, default="", help="The model.") - vllm_parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") - vllm_parser.add_argument("--gpu_memory_utilization", type=float, default=0.9, help="GPU Memory fraction") - if vllm_batch_size: - vllm_parser.add_argument("--batch_size", type=int, default=128, help="The batch size.") - - openai_parser = engine_parser.add_parser("openai", help="OpenAI Engine") - openai_parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") - openai_parser.add_argument("--api_base", type=str, default="http://localhost:8000/v1", help="The OpenAI Server URL") - - triton_parser = engine_parser.add_parser("triton", help="Triton Engine") - triton_parser.add_argument("--model", type=str, default="ensemble", help="The model.") - triton_parser.add_argument("--http_server", type=str, default="http://localhost:8000", help="The Triton Server URL") - triton_parser.add_argument("--grpc_server", type=str, default="localhost:8001", help="The Triton gRPC Server URL") - - tgi_parser = engine_parser.add_parser("tgi", help="Text-generation-inference Engine") - tgi_parser.add_argument("--server", type=str, default="http://127.0.0.1:80/", help="The TGI Server URL") + parser = engine_parser.add_parser("openai", help="OpenAI API") + parser.add_argument("--model", type=str, default="", help="The model.") + parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") + parser.add_argument("--gpu_memory_utilization", type=float, default=0.9, help="GPU Memory fraction") + #if vllm_batch_size: + # parser.add_argument("--batch_size", type=int, default=128, help="The batch size.") + parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") + parser.add_argument("--api_base", type=str, default="http://localhost:8080/v1", help="The OpenAI Server URL") if __name__ == "__main__": parser = argparse.ArgumentParser(description="LLMPerf tools to measure LLM performance") @@ -188,23 +120,23 @@ def add_engines_parser(base_parser, vllm_batch_size = False): ttft_parser = test_parser.add_parser("ttft", help="Measure Time To First Token (TTFT)") ttft_parser.add_argument("--prompt_file", type=str, help="Path to a file containing the prompt.") ttft_parser.add_argument("--iterations", type=int, default=10, help="The iterations parameter.") - add_engines_parser(ttft_parser) + add_parser(ttft_parser) tpot_parser = test_parser.add_parser("tpot", help="Measure Time Per Output Token (TPOT)") tpot_parser.add_argument("--prompt_file", type=str, help="Path to a file containing the prompt.") tpot_parser.add_argument("--iterations", type=int, default=10, help="The iterations parameter.") tpot_parser.add_argument("--output_tokens", type=int, default=128, help="Number of tokens to retrieve") - add_engines_parser(tpot_parser) + add_parser(tpot_parser) stb_parser = test_parser.add_parser("static_batch_throughput", help="Measure throughput for static batch") stb_parser.add_argument("--prompt_file", type=str, help="Path to a file containing the prompt.") stb_parser.add_argument("--iterations", type=int, default=10, help="The iterations parameter.") stb_parser.add_argument("--output_tokens", type=int, default=128, help="Number of tokens to retrieve") stb_parser.add_argument("--batch_size", type=int, default=128, help="Number of sequences to batch") - stb_engine_parser = stb_parser.add_subparsers(title="Engine", dest="engine", required=True) - stb_vllm_parser = stb_engine_parser.add_parser("vllm", help="vLLM Engine") - stb_vllm_parser.add_argument("--model", type=str, default="", help="The model.") - stb_vllm_parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") + #stb_engine_parser = stb_parser.add_subparsers(title="Engine", dest="engine", required=True) + #stb_vllm_parser = stb_engine_parser.add_parser("vllm", help="vLLM Engine") + #stb_vllm_parser.add_argument("--model", type=str, default="", help="The model.") + #stb_vllm_parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") rth_parser = test_parser.add_parser("rate_throughput", help="Measure throughput with sending requests at constant rate") rth_parser.add_argument("--prompt_file", type=str, help="Path to a file containing the prompt.") @@ -213,7 +145,7 @@ def add_engines_parser(base_parser, vllm_batch_size = False): rth_parser.add_argument("--qps", type=int, default=4, help="Number of queries to send per second") rth_parser.add_argument("--t", type=int, default=1, help="Time frame to send the QPS amount requests") rth_parser.add_argument("--total_requests", type=int, default=5000, help="Number of requests to send in total") - add_engines_parser(rth_parser, True) + add_parser(rth_parser, True) rst_parser = test_parser.add_parser("rate_sampled_throughput", help="Measure throughput with sending requests at constant rate") rst_parser.add_argument("--dataset", type=str, help="Path to a file containing the dataset.") @@ -221,7 +153,7 @@ def add_engines_parser(base_parser, vllm_batch_size = False): rst_parser.add_argument("--qps", type=int, default=4, help="Number of queries to send per second (Per t)") rst_parser.add_argument("--t", type=int, default=1, help="Time frame to send the QPS amount requests") rst_parser.add_argument("--total_requests", type=int, default=5000, help="Number of requests to send in total") - add_engines_parser(rst_parser, True) + add_parser(rst_parser, True) rsot_parser = test_parser.add_parser("rate_sampled_output_throughput", help="Measure throughput with sending requests at constant rate") rsot_parser.add_argument("--dataset", type=str, help="Path to a file containing the dataset.") @@ -231,7 +163,7 @@ def add_engines_parser(base_parser, vllm_batch_size = False): rsot_parser.add_argument("--total_requests", type=int, default=5000, help="Number of requests to send in total") rsot_parser.add_argument("--temperature", type=float, default=1, help="Temperature in sampling phase") rsot_parser.add_argument("--top_k", type=int, default=15, help="Tok K in sampling phase") - add_engines_parser(rsot_parser, True) + add_parser(rsot_parser, True) args = parser.parse_args() @@ -247,4 +179,4 @@ def add_engines_parser(base_parser, vllm_batch_size = False): elif args.test == "rate_sampled_throughput": run_rate_sampled_throughput(args) elif args.test == "rate_sampled_output_throughput": - run_rate_sampled_output_throughput(args) \ No newline at end of file + run_rate_sampled_output_throughput(args) diff --git a/openai_perf.py b/openai_perf.py index 5d10ab3..ac70006 100644 --- a/openai_perf.py +++ b/openai_perf.py @@ -2,10 +2,10 @@ from timeit import default_timer as timer def ttft_measurer(prompt, args): - model = get_model(args) + client, model = get_model(args) def single_request(): start = timer() - completion = openai.Completion.create( + completion = client.completions.create( model=model, echo=False, prompt=prompt, @@ -20,15 +20,15 @@ def single_request(): return single_request def tpot_measurer(prompt, args): - model = get_model(args) + client, model = get_model(args) async def single_request(): start = timer() - completion = openai.Completion.create( + completion = client.completions.create( model=model, echo=False, prompt=prompt, max_tokens=args.output_tokens, - temperature=0, + temperature=0.01, n=1, stream=True, ) @@ -41,9 +41,9 @@ async def single_request(): return single_request def rate_throughput_measurer(prompt, args): - model = get_model(args) + client, model = get_model(args, async_client = True) async def single_request(): - completion = await openai.Completion.acreate( + completion = await client.completions.create( model=model, echo=False, prompt=prompt, @@ -58,9 +58,9 @@ async def single_request(): return single_request def sample_rate_throughput_measurer(args): - model = get_model(args) + client, model = get_model(args, async_client = True) async def single_request(sample): - completion = await openai.Completion.acreate( + completion = await client.completions.create( model=model, echo=False, prompt=sample["prompt"], @@ -75,9 +75,9 @@ async def single_request(sample): return single_request def sample_output_rate_throughput_measurer(args): - model = get_model(args) + client, model = get_model(args, async_client = True) async def single_request(sample): - completion = await openai.Completion.acreate( + completion = await client.completions.create( model=model, echo=False, prompt=sample["prompt"], @@ -90,8 +90,11 @@ async def single_request(sample): return completion.usage.completion_tokens return single_request -def get_model(args): - openai.api_key = args.api_key - openai.api_base = args.api_base - models = openai.Model.list() - return models["data"][0]["id"] +def get_model(args, async_client=False): + client = (openai.Client if not async_client else openai.AsyncClient) ( + api_key = args.api_key, + base_url = args.api_base + ) + + model = args.model + return client, model diff --git a/requirements.txt b/requirements.txt index 93cd049..83bae8f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -openai==0.28.1 -vllm==0.2.1 -text-generation==0.6.1 -tritonclient==2.39.0 \ No newline at end of file +openai==1.34.0 +vllm==0.4.3 +text-generation==2.0.4 +tritonclient==2.39.0 diff --git a/tgi_perf.py b/tgi_perf.py deleted file mode 100644 index cf22c9d..0000000 --- a/tgi_perf.py +++ /dev/null @@ -1,44 +0,0 @@ -from text_generation import Client, AsyncClient -from timeit import default_timer as timer - -TIMEOUT_24_HOURS = 1440 - -def ttft_measurer(prompt, args): - client = Client(args.server) - def single_request(): - start = timer() - _ = client.generate(prompt, max_new_tokens=1) - return timer() - start - return single_request - -def tpot_measurer(prompt, args): - client = Client(args.server) - async def single_request(): - i = 0 - for _ in client.generate_stream(prompt, max_new_tokens=args.output_tokens): - if i == 0: - start = timer() - i += 1 - return (timer() - start) / (i - 1) - return single_request - -def rate_throughput_measurer(prompt, args): - client = AsyncClient(args.server, timeout=TIMEOUT_24_HOURS) - async def single_request(): - _ = await client.generate(prompt, max_new_tokens=args.output_tokens) - return args.output_tokens - return single_request - -def sample_rate_throughput_measurer(args): - client = AsyncClient(args.server, timeout=TIMEOUT_24_HOURS) - async def single_request(sample): - _ = await client.generate(sample["prompt"], max_new_tokens=sample["output_len"]) - return sample["output_len"] - return single_request - -def sample_output_rate_throughput_measurer(args): - client = AsyncClient(args.server, timeout=TIMEOUT_24_HOURS) - async def single_request(sample): - response = await client.generate(sample["prompt"], max_new_tokens=2048, temperature=args.temperature, top_k=args.top_k) - return response.details.generated_tokens - return single_request diff --git a/triton_perf.py b/triton_perf.py deleted file mode 100644 index ae1ce9b..0000000 --- a/triton_perf.py +++ /dev/null @@ -1,184 +0,0 @@ -import requests -import aiohttp -import tritonclient.grpc as grpcclient -from tritonclient.utils import InferenceServerException, np_to_triton_dtype -from timeit import default_timer as timer -import numpy as np -from functools import partial -import queue - -class UserData: - def __init__(self): - self._completed_requests = queue.Queue() - -def prepare_tensor(name, input): - t = grpcclient.InferInput(name, input.shape, - np_to_triton_dtype(input.dtype)) - t.set_data_from_numpy(input) - return t - -def ttft_measurer(prompt, args): - server = args.http_server - model = args.model - def single_request(): - req = { - "text_input": prompt, - "max_tokens": 1, - "bad_words": "", - "stop_words": "" - } - start = timer() - res = requests.post(f"{server}/v2/models/{model}/generate", json=req) - return timer() - start - return single_request - -def tpot_measurer(prompt, args): - client = grpcclient.InferenceServerClient(url=args.grpc_server) - input0 = [[prompt]] - input0_data = np.array(input0).astype(object) - output0_len = np.ones_like(input0).astype(np.uint32) * args.output_tokens - bad_words_list = np.array([[""]], dtype=object) - stop_words_list = np.array([[""]], dtype=object) - streaming = [[True]] - streaming_data = np.array(streaming, dtype=bool) - beam_width = [[1]] - beam_width_data = np.array(beam_width, dtype=np.uint32) - inputs = [ - prepare_tensor("text_input", input0_data), - prepare_tensor("max_tokens", output0_len), - prepare_tensor("bad_words", bad_words_list), - prepare_tensor("stop_words", stop_words_list), - prepare_tensor("stream", streaming_data), - prepare_tensor("beam_width", beam_width_data), - ] - - async def single_request(): - user_data = UserData() - i = 0 - start = timer() - def callback(user_data, result, error): - nonlocal start - nonlocal i - if error: - user_data._completed_requests.put(error) - else: - i += 1 - if i == 1: - start = timer() - user_data._completed_requests.put(result) - client.start_stream(callback=partial(callback, user_data)) - client.async_stream_infer(args.model, inputs, request_id=str(1)) - client.stop_stream() - while True: - try: - result = user_data._completed_requests.get(block=False) - except Exception: - break - - if type(result) == InferenceServerException: - print("Received an error from server:") - print(result) - else: - result.as_numpy('text_output') - return (timer() - start) / (i - 1) - return single_request - -def rate_throughput_measurer(prompt, args): - server = args.http_server - model = args.model - async def single_request(): - conn = aiohttp.TCPConnector(limit=None, ttl_dns_cache=300) - session = aiohttp.ClientSession(connector=conn) - req = { - "text_input": prompt, - "max_tokens": args.output_tokens, - "bad_words": "", - "stop_words": "" - } - async with session.post(f"{server}/v2/models/{model}/generate", json=req) as response: - _ = await response.text() - await session.close() - await conn.close() - return args.output_tokens - return single_request - -def sample_rate_throughput_measurer(args): - server = args.http_server - model = args.model - async def single_request(sample): - conn = aiohttp.TCPConnector(limit=None, ttl_dns_cache=300) - session = aiohttp.ClientSession(connector=conn) - req = { - "text_input": sample["prompt"], - "max_tokens": sample["output_len"], - "bad_words": "", - "stop_words": "" - } - async with session.post(f"{server}/v2/models/{model}/generate", json=req) as response: - _ = await response.text() - await session.close() - await conn.close() - return sample["output_len"] - return single_request - -def sample_output_rate_throughput_measurer(args): - client = grpcclient.InferenceServerClient(url=args.grpc_server) - bad_words_list = np.array([[""]], dtype=object) - stop_words_list = np.array([[""]], dtype=object) - streaming = [[True]] - streaming_data = np.array(streaming, dtype=bool) - beam_width = [[1]] - beam_width_data = np.array(beam_width, dtype=np.uint32) - temperature = [[args.temperature]] - temperature_data = np.array(temperature, dtype=np.float32) - top_k = [[args.top_k]] - top_k_data = np.array(top_k, dtype=np.uint32) - eos = [[2]] - eos_data = np.array(eos, dtype=np.uint32) - inputs = [ - prepare_tensor("bad_words", bad_words_list), - prepare_tensor("stop_words", stop_words_list), - prepare_tensor("stream", streaming_data), - prepare_tensor("beam_width", beam_width_data), - prepare_tensor("temperature", temperature_data), - prepare_tensor("top_k", top_k_data), - prepare_tensor("end_id", eos_data), - ] - global_id = 0 - async def single_request(sample): - nonlocal global_id - user_data = UserData() - - n_inputs = inputs.copy() - input0 = [[sample["prompt"]]] - input0_data = np.array(input0).astype(object) - output0_len = np.ones_like(input0).astype(np.uint32) * 2048 - n_inputs.append(prepare_tensor("text_input", input0_data)) - n_inputs.append(prepare_tensor("max_tokens", output0_len)) - - i = 0 - def callback(user_data, result, error): - nonlocal i - if error: - user_data._completed_requests.put(error) - else: - i += 1 - user_data._completed_requests.put(result) - client.start_stream(callback=partial(callback, user_data)) - client.async_stream_infer(args.model, n_inputs, request_id=str(global_id)) - global_id += 1 - client.stop_stream() - while True: - try: - result = user_data._completed_requests.get(block=False) - except Exception: - break - - if type(result) == InferenceServerException: - print("Received an error from server:") - print(result) - else: - result.as_numpy('text_output') - print(i) - return i - return single_request diff --git a/vllm_perf.py b/vllm_perf.py deleted file mode 100644 index 1601c4d..0000000 --- a/vllm_perf.py +++ /dev/null @@ -1,137 +0,0 @@ -from vllm import LLM, SamplingParams -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.utils import random_uuid -from timeit import default_timer as timer - -def ttft_measurer(prompt, args): - llm = LLM( - model=args.model, - trust_remote_code=True, - dtype=args.dtype, - ) - tokenizer = llm.get_tokenizer() - def single_request(): - sampling_params = SamplingParams( - temperature=0.0, - ignore_eos=True, - max_tokens=1, - ) - prompt_token_ids = tokenizer.encode(prompt) - llm._add_request( - prompt=None, - prompt_token_ids=prompt_token_ids, - sampling_params=sampling_params, - ) - start = timer() - llm._run_engine(use_tqdm=False) - return timer() - start - return single_request - -def tpot_measurer(prompt, args): - engineArgs = AsyncEngineArgs(args.model) - engineArgs.trust_remote_code = True - engineArgs.dtype = args.dtype - engineArgs.disable_log_stats = True - engineArgs.disable_log_requests = True - llm = AsyncLLMEngine.from_engine_args(engineArgs) - - async def single_request(): - sampling_params = SamplingParams( - temperature=0.0, - ignore_eos=True, - max_tokens=args.output_tokens, - ) - request_id = random_uuid() - results_generator = llm.generate(prompt, sampling_params, request_id) - i = 0 - async for _ in results_generator: - if i == 0: - start = timer() - i += 1 - return (timer() - start) / (i - 1) - return single_request - -def static_batch_measurer(prompt, args): - llm = LLM( - model=args.model, - trust_remote_code=True, - dtype=args.dtype, - ) - tokenizer = llm.get_tokenizer() - def single_request(): - sampling_params = SamplingParams( - temperature=0.0, - ignore_eos=True, - max_tokens=args.output_tokens, - ) - prompt_token_ids = tokenizer.encode(prompt) - for _ in range(args.batch_size): - llm._add_request( - prompt=None, - prompt_token_ids=prompt_token_ids, - sampling_params=sampling_params, - ) - start = timer() - llm._run_engine(use_tqdm=True) - total_time = timer() - start - tokens_count = args.batch_size * args.output_tokens - return tokens_count / total_time - return single_request - -def rate_throughput_measurer(prompt, args): - llm = init_async_llm(args) - - async def single_request(): - sampling_params = SamplingParams( - temperature=0.0, - ignore_eos=True, - max_tokens=args.output_tokens, - ) - request_id = random_uuid() - results_generator = llm.generate(prompt, sampling_params, request_id) - async for _ in results_generator: - pass - return args.output_tokens - return single_request - -def sample_rate_throughput_measurer(args): - llm = init_async_llm(args) - async def single_request(sample): - sampling_params = SamplingParams( - temperature=0.0, - ignore_eos=True, - max_tokens=sample["output_len"], - ) - request_id = random_uuid() - results_generator = llm.generate(sample["prompt"], sampling_params, request_id) - async for _ in results_generator: - pass - return sample["output_len"] - return single_request - -def sample_output_rate_throughput_measurer(args): - llm = init_async_llm(args) - async def single_request(sample): - sampling_params = SamplingParams( - top_k=args.top_k, - temperature=args.temperature, - max_tokens=4096, - ) - request_id = random_uuid() - results_generator = llm.generate(sample["prompt"], sampling_params, request_id) - i = 0 - async for _ in results_generator: - i += 1 - return i - return single_request - -def init_async_llm(args): - engineArgs = AsyncEngineArgs(args.model) - engineArgs.trust_remote_code = True - engineArgs.dtype = args.dtype - engineArgs.max_num_seqs = args.batch_size - engineArgs.gpu_memory_utilization = args.gpu_memory_utilization - engineArgs.disable_log_stats = True - engineArgs.disable_log_requests = True - return AsyncLLMEngine.from_engine_args(engineArgs) From 38f864bb45fbcae6d2a76978422ab7d8daaa9933 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 17 Jun 2024 10:21:28 +0000 Subject: [PATCH 2/6] Created engine args again with respective ports that are exposed for their Docker images --- llmperf.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/llmperf.py b/llmperf.py index 4d79cf3..e093673 100644 --- a/llmperf.py +++ b/llmperf.py @@ -102,15 +102,20 @@ async def wrapper(): def add_parser(base_parser, vllm_batch_size = False): engine_parser = base_parser.add_subparsers(title="Engine", dest="engine", required=True) - parser = engine_parser.add_parser("openai", help="OpenAI API") - parser.add_argument("--model", type=str, default="", help="The model.") - parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") - parser.add_argument("--gpu_memory_utilization", type=float, default=0.9, help="GPU Memory fraction") - #if vllm_batch_size: - # parser.add_argument("--batch_size", type=int, default=128, help="The batch size.") - - parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") - parser.add_argument("--api_base", type=str, default="http://localhost:8080/v1", help="The OpenAI Server URL") + vllm_parser = engine_parser.add_parser("vllm", help="vLLM Engine") + vllm_parser.add_argument("--model", type=str, default="", help="The model.") + vllm_parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") + vllm_parser.add_argument("--api_base", type=str, default="http://localhost:8000/v1", help="The OpenAI Server URL") + + nim_parser = engine_parser.add_parser("nim", help="NVIDIA NIM (TRT-LLM engine with Triton)") + nim_parser.add_argument("--model", type=str, default="", help="The model.") + nim_parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") + nim_parser.add_argument("--api_base", type=str, default="http://localhost:8000/v1", help="The OpenAI Server URL") + + tgi_parser = engine_parser.add_parser("tgi", help="Text-generation-inference Engine by HuggingFace") + tgi_parser.add_argument("--model", type=str, default="", help="The model.") + tgi_parser.add_argument("--api_key", type=str, default="API_KEY", help="The OpenAI API Key") + tgi_parser.add_argument("--api_base", type=str, default="http://localhost:8080/v1", help="The OpenAI Server URL") if __name__ == "__main__": parser = argparse.ArgumentParser(description="LLMPerf tools to measure LLM performance") From f207cf0a62f2d4c564ce4432bd2a244d5140c7e3 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 18 Jun 2024 08:54:47 +0000 Subject: [PATCH 3/6] Cleaned the requirements file --- llmperf.py | 8 ++++---- requirements.txt | 3 --- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/llmperf.py b/llmperf.py index e093673..70a185f 100644 --- a/llmperf.py +++ b/llmperf.py @@ -138,10 +138,10 @@ def add_parser(base_parser, vllm_batch_size = False): stb_parser.add_argument("--iterations", type=int, default=10, help="The iterations parameter.") stb_parser.add_argument("--output_tokens", type=int, default=128, help="Number of tokens to retrieve") stb_parser.add_argument("--batch_size", type=int, default=128, help="Number of sequences to batch") - #stb_engine_parser = stb_parser.add_subparsers(title="Engine", dest="engine", required=True) - #stb_vllm_parser = stb_engine_parser.add_parser("vllm", help="vLLM Engine") - #stb_vllm_parser.add_argument("--model", type=str, default="", help="The model.") - #stb_vllm_parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") + stb_engine_parser = stb_parser.add_subparsers(title="Engine", dest="engine", required=True) + stb_vllm_parser = stb_engine_parser.add_parser("vllm", help="vLLM Engine") + stb_vllm_parser.add_argument("--model", type=str, default="", help="The model.") + stb_vllm_parser.add_argument("--dtype", type=str, default="float16", help="The dtype.") rth_parser = test_parser.add_parser("rate_throughput", help="Measure throughput with sending requests at constant rate") rth_parser.add_argument("--prompt_file", type=str, help="Path to a file containing the prompt.") diff --git a/requirements.txt b/requirements.txt index 83bae8f..9ff727d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1 @@ openai==1.34.0 -vllm==0.4.3 -text-generation==2.0.4 -tritonclient==2.39.0 From c8dd654e5ef58a1e62fbc42ff73b55ac86a8bb6e Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 9 Jul 2024 09:18:39 +0000 Subject: [PATCH 4/6] Add command-line arguments to script for tokenizer and text input; count and display number of tokens --- input_examples/token_counter.py | 26 +++++++++++++++++++++----- 1 file changed, 21 insertions(+), 5 deletions(-) diff --git a/input_examples/token_counter.py b/input_examples/token_counter.py index 587c34c..9ba97c8 100644 --- a/input_examples/token_counter.py +++ b/input_examples/token_counter.py @@ -1,6 +1,22 @@ +import argparse from transformers import AutoTokenizer -tokenizer = AutoTokenizer.from_pretrained("NousResearch/Meta-Llama-3-8B") -text = "Mr. and Mrs. Dursley, of number four, Privet Drive, were proud to say that they were perfectly normal, thank you very much. They were the last people you'd expect to be involved in anything strange or mysterious, because they just didn't hold with such nonsense. Mr. Dursley was the director of a firm called Grunnings, which made drills. He was a big, beefy man with hardly any neck, although he did have a very large mustache. Mrs. Dursley was thin and blonde and had nearly twice the usual amount of neck, which came in very useful as she spent so much of her time craning over garden fences, spying on the neighbors. The Dursleys had a small son called Dudley and in their opinion there was no finer boy anywhere. The Dursleys had everything they wanted, but they also had a secret, and their greatest fear was that somebody would discover it. They didn't think they could bear it if anyone found out about the Potters. Mrs. Potter was Mrs. Dursley's sister, but they hadn't met for several years; in fact, Mrs. Dursley pretended she didn't have a sister, because her sister and her good-for-nothing husband were as unDursleyish as it was possible to be. The Dursleys shuddered to think what the neighbors would say if the Potters arrived in the street. The Dursleys knew that the Potters had a small son, too, but they had never even seen him. This boy was another good reason for keeping the Potters away; they didn't want Dudley mixing with a child like that. When Mr. and Mrs. Dursley woke up on the dull, gray Tuesday our story starts, there was nothing about the cloudy sky outside to suggest that strange and mysterious things would soon be happening all over the country. Mr. Dursley hummed as he picked out his most boring tie for work, and Mrs. Dursley gossiped away happily as she wrestled a screaming Dudley into his high chair. None of them noticed a large, tawny owl flutter past the window. At half past eight, Mr. Dursley picked up his briefcase, pecked Mrs. Dursley on the cheek, and tried to kiss Dudley good-bye but missed, because Dudley was now having a tantrum and throwing his cereal at the walls. Little tyke,' chortled Mr. Dursley as he left the house. He got into his car and backed out of number four's drive. It was on the corner of the street that he noticed the first sign of something peculiar a cat reading a map. For a second, Mr. Dursley didn't realize what he had seen then he jerked his head around to look again. There was a tabby cat standing on the corner of Privet Drive, but there wasn't a map in sight. What could he have been thinking of? It must have been a trick of the light. Mr. Dursley blinked and stared at the cat. It stared back. As Mr. Dursley drove around the corner and up the road, he watched the cat in his mirror. It was now reading the sign that said Privet Drive no, looking at the sign; cats couldn't read maps or signs. Mr. Dursley gave himself a little shake and put the cat out of his mind. As he drove toward town he thought of nothing except a large order of drills he was hoping to get that day. But on the edge of town, drills were driven out of his mind by something else. As he sat in the usual morning traffic jam, he couldn't help noticing that there seemed to be a lot of strangely dressed people about. People in cloaks. Mr. Dursley couldn't bear people who dressed in funny clothes the getups you saw on young people! He supposed this was some stupid new fashion. He drummed his fingers on the steering wheel and his eyes fell on a huddle of these weirdos standing quite close by. They were whispering excitedly together. Mr. Dursley was enraged to see that a couple of them weren't young at all; why, that man had to be older than he was, and wearing an emerald-green cloak! The nerve of him! But then it struck Mr. Dursley that this was probably some silly stunt these people were obviously collecting for something . . . yes, that would be it. The traffic moved on and a few minutes later, Mr. Dursley arrived in the Grunnings parking lot, his mind back on drills. Mr. Dursley always sat with his back to the window in his office on the ninth floor. If he hadn't, he might have found it harder to concentrate on drills that morning. He didn't see the owls swoop- ing past in broad daylight, though people down in the street did; they pointed and gazed open-mouthed as owl after owl sped overhead. Most of them had never seen an owl even at nighttime. Mr. Dursley, however, had a perfectly normal, owl-free morning. He yelled at five different people. He made several important telephone calls and shouted a bit more. He was in a very good mood until lunchtime, when he thought he'd stretch his legs and walk across the road to buy himself a bun from the bakery. He'd forgotten all about the people in cloaks until he passed a group of them next to the baker's. He eyed them angrily as he passed. He didn't know why, but they made him uneasy. This bunch were whispering excitedly, too, and he couldn't see a single collecting tin. It was on his way back past them, clutching a large doughnut in a bag, that he caught a few words of what they were saying. 'The Potters, that's right, that's what I heard ' ' yes, their son, Harry ' Mr. Dursley stopped dead. Fear flooded him. He looked back at the whisperers as if he wanted to say something to them, but thought better of it. He dashed back across the road, hurried up to his office,snapped at his secretary not to disturb him, seized his telephone,and had almost finished dialing his home number when hechanged his mind. He put the receiver back down and stroked hismustache, thinking . . . no, he was being stupid. Potter wasn't suchan unusual name. He was sure there were lots of people called Potter who had a son called Harry. Come to think of it, he wasn't evensure his nephew was called Harry. He'd never even seen the boy. It might have been Harvey. Or Harold. There was no point in worrying Mrs. Dursley; she always got so upset at any mention of hersister. He didn't blame her if he'd had a sister like that . . . but allthe same, those people in cloaks . . .He found it a lot harder to concentrate on drills that afternoonand when he left the building at five o'clock, he was still so worriedthat he walked straight into someone just outside the door.'Sorry,' he grunted, as the tiny old man stumbled and almostfell. It was a few seconds before Mr. Dursley realized that the manwas wearing a violet cloak. He didn't seem at all upset at being almost knocked to the ground. On the contrary, his face split into awide smile and he said in a squeaky voice that made passersby stare,'Don't be sorry, my dear sir, for nothing could upset me today! Rejoice, for You-Know-Who has gone at last! Even Muggles like yourself should be celebrating, this happy, happy day!'And the old man hugged Mr. Dursley around the middle andwalked off.Mr. Dursley stood rooted to the spot. He had been hugged by acomplete stranger. He also thought he had been called a Muggle,whatever that was. He was rattled. He hurried to his car and set offfor home, hoping he was imagining things, which he had neverhoped before, because he didn't approve of imagination.As he pulled into the driveway of number four, the first thing hesaw and it didn't improve his mood was the tabby cat he'dspotted that morning. It was now sitting on his garden wall. Hewas sure it was the same one; it had the same markings around itseyes.'Shoo!' said Mr. Dursley loudly. The cat didn't move. It just gave him a stern look. Was this normal cat behavior? Mr. Dursley wondered. Trying to pull himself together, he let himself into the house. He was still determined not tomention anything to his wife.Mrs. Dursley had had a nice, normal day. She told him over dinner all about Mrs. Next Door's problems with her daughter and how Dudley had learnt a new word (‘Shan’t!’). Mr Dursley tried to act normally. When Dudley had been put to bed, he went into the living-room in time to catch the last report on the evening news: ‘And finally, bird-watchers everywhere have reported that the nation’s owls have been behaving very unusually today. Although owls normally hunt at night and are hardly ever seen in daylight, there have been hundreds of sightings of these birds flying in every direction since sunrise. Experts are unable to explain why the owls have suddenly changed their sleeping pattern.’ The news reader allowed himself a grin. ‘Most mysterious. And now, over to Jim McGuffin with the weather. Going to be any more showers of owls tonight, Jim?’ ‘Well, Ted,’ said the weatherman, ‘I don’t know about that, but it’s" -tokens = tokenizer.tokenize(text) -num_tokens = len(tokens) -print(f"Number of tokens in your text: {num_tokens}") + +def count_tokens(tokenizer_name, text): + tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) + tokens = tokenizer.tokenize(text) + return len(tokens) + +def main(): + parser = argparse.ArgumentParser(description="Count the number of tokens in a given text using a specified tokenizer.") + parser.add_argument('--tokenizer', type=str, required=True, help="The name of the tokenizer to use.") + parser.add_argument('--text', type=str, required=True, help="The text to tokenize.") + + args = parser.parse_args() + + num_tokens = count_tokens(args.tokenizer, args.text) + + print(f"Number of tokens: {num_tokens}") + +if __name__ == "__main__": + main() + From 821e17dd20569addbb6bdb98310f692d97f3ad2f Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 9 Jul 2024 09:47:43 +0000 Subject: [PATCH 5/6] chore: add gitignore and ignore pycache --- .gitignore | 1 + __pycache__/openai_perf.cpython-310.pyc | Bin 2718 -> 0 bytes __pycache__/vllm_perf.cpython-310.pyc | Bin 3883 -> 0 bytes 3 files changed, 1 insertion(+) create mode 100644 .gitignore delete mode 100644 __pycache__/openai_perf.cpython-310.pyc delete mode 100644 __pycache__/vllm_perf.cpython-310.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c18dd8d --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +__pycache__/ diff --git a/__pycache__/openai_perf.cpython-310.pyc b/__pycache__/openai_perf.cpython-310.pyc deleted file mode 100644 index 9bce24455c7a5ddc96862ae32acafed7175cd05e..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2718 zcmbW3O>g5w7{_PEj^m_BH!0oSmPK@7*`n~$t^n;`ssdV6UL*n%m5?GifF zb|hL7iPctWd+3SpfRqa#$T!5PXD*Zj@PEch-B3yqj{J@1hd10e{ZF5y$M9 zAF@M!D4Le!QXIHLF%X#6sy4=BO+kZmxL?2>z6TSr=WNK1*f!r5M_lqFAqD<{kl6o8 zcE|_(BlbiL#qJ#S9Iyq3NuBD(DaT+rt?y zx?wyMsZLL6FY0R*^KR^fkyc)(V#OKrFXNd8#`r2~2k7lI&=@Id4I@uuszc2K}hlN4%R@LHiAADp3<} z>oUrbtqGE*uuYjKJ|%Y=7cbz!_(pu6(zM4+oijV#Nyty7Gik#) z({)sy@gu&CWJVe%G?R2TG(Uw*eS%~kuy@$L^RB6V&4zq;{vinfsRa_-FuK63Vx^?? zOIV^J3>w2l8;=;@oMU(!eM0I;7?PBlKq#D*kW%7LN9;d<7N~l(rGtKJi{M|EUstnO zxN535X(yCbB_Owpn)0SWk~O)=eUq&EHOZ<$ie)URO_L5l0xg7#fJEQLxT%RIvTAZ` z1Fhh|3=Ckfw}A;zHU?qoRnJLY3yt1eGiHA0;!^uX6KMifD*ob(6WN2HwY_NfIqxGw~X2 zFX1;!{9Z?W%Iz2V>3eAXXFAl44s}E40Uj>XNfZy3OXtK**=&n#E73ELt*d8XAZw}OW%{AlQnhk=tWCsE=+|fjiJvx}nkM!I z-iIV~ljNGD7$sWaH#fh(K6dIjVEz|0y-drSl~Y>F|83&WX@}>~3i=JyH>SjpGLG)Y z)RUSAJj_H8t*(F-0~|rPah9Fny8}-dI(i0HlLSgsD@*^?bY0;Ua0DQ?N^-G zNe=p&^VobD)Mh>;(M&3ul%VF)FdAW?frkmqSJ3qL1WA2Pu6}|$q$3MKh@bO^lX1DG zmI~jf#4iXHllX-Ar3Eji!l3^yl%(H#@K#u;X0;#{-N`C_rVhL4B7uhvPULJ9#F R9|W#&=iHla*)7kO{sPY3EIa@J diff --git a/__pycache__/vllm_perf.cpython-310.pyc b/__pycache__/vllm_perf.cpython-310.pyc deleted file mode 100644 index 243a7aad0c01877ff222b95707d2f4e0a14cb7fb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3883 zcmbVPU2hx56`h&=&}v0Wlx!t(oHj}8q+!}fM%vgx>bg#v7I|=mAW>0(K(U}XOPca7 zmzS@$~BZN*^=uh*X4%1gmOb({(`kP zzrhMvTNKXSyMHZKzv!mpWH=gp)>Yk9w|TMlk$yPp{dqJPj^dBhfVx&~7hW%W(X*nu zBblX9o)0COO&RyQd14|nOk>rf&CV_d6%RL+dK<>DDc|Rk!wM;$IFH#eJ8_<}$Kr?` z@gvc9q$Az0gF`VDSgyCbcj^>gn#njR){V-wiBz0sCXRaOD?Di)j^iE%uCLjYm(Zg> zfiXC5DQMUy&LKM%C;XT{<%j&3VF_5ylu7T2!bF2c0?ugGK%;*rK{{#a${*c7d@EFVNesf#sT z9e*^o=l_>e-U)IYM`j|^wx?(-g&&XXRvS@QN^E|T$J!L0J?mm6Qu!!~EnIC!UB(BB zHePtzbd|yEPLiZ*gSwXOiiQWVI%VI!v2#C5+~ zj3k~-xr#CLt1yC>KmS?ixpm$58a(6&ZyM7#Q5ttO4n=L>O0r%z(YLo3$2xTtu|zGC zS%K*^2Qm0VM`jo3luLHIprCr4-ugM2t7Kk-DVhuOETK!cU<9WXx!ivicrEU(scYz5 zV5Exly!KX)2!b$Gm1K2nCipQwQX5clUZ2 zslI&FXi5LZt@|)v@sflzlXQrNMu2P};)!!CrXb-{2lKlaPrSzt)_)68_>}#f9lBG{ z_JL5>jEna@w7i?_(3!k^gHg}a+hWwW#jX-b{aw|-Y~`XvfQys(g!I1xk^ zwqK#52|Dp*Ng9I2_f`Q>iwE2XWyG@$7ql5?FQ!d9Py}B|8&X`$onl=Mb@v{anq-3r zyf?bIJl|DqMk`XHqSc2xRxT3}zuIoOa@t4{mcB&NH^;kQ(_9tr<`+s`mUkhxLffGY zwS>E>uH(r9?}%@;64KL>ce0crQbNlHD4|nw?8xc6=e1A;G%W7=A`t1mgj?^!oG03` zb0UsG-9v61(zZwZ6ZVe~I>-0DO8uaVoJ}^24~jYHUxHeCR{zj5b+$l5`jW_dnMmkO z-3#c3xX08c3{axYsyE5J1XFm37lXJ6?{!V@ex%96U#5pdIdui5;+>T)^$ztCxr)$a zrkmKX(rJ`>3fYHZK|ctsIokb$4v)@m z5AW3%IY+HKchLB8x??rRuQkV+Y~HY%Xq9bFQQ#=FtP)rvoq7ufy*BKStE;!kSOBfd zECTX3Jk*5wc`;~F6#32<2iHl7B6DA5`QScD)r*igMS3zE4NcUgv?ZcgcLvLu*KR#; z+|OxAw3UF|c13-FYLA3-b_rB!JZSAmU)wr*fr`fg6;i?Gu3Bp#KDPhlQefCcFc zAFuFjo)KS!(LbWMA2T|$8ne&PAk0YiaN7?Nxgnz# zGk)Thh|DlU^hUG>Zei_75K0@b-N`G!&$ZYM8S<1ia$1v(qX%jYvncdb(er%FRwQEO zOJ{)k>O<;1=XGuBBiEaqu8qlyxN1DsbSC71E`FlImBKMX@prB`xLpBIk?SV`tZva_ z?A(8(nnm`lA2;xX)>yhE(u9V(gZ700|A}6t^C6eC8JzuJ5y6*`!STpJ>7w*d`Y3(+ z*AXpQ8<^#j5Lp*FuGrmcuUwQ^sP`~l5Q?7ewl5Vf<>O_hOg`P&r1o~1 zp=={{u`AGa>77>>p3T&-%5&_ZI-z}qR$V23KK+Rp8bzEhgYh-0T_;14$;zSiFpGj+ n*W6C(?G(W+{6ou=_%`v^3P+1I5LxLLh+qr9t3fMRYqY)xFiT9% From 2ab07fca335ea32418311f6b993604d75ee77f8b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 9 Jul 2024 10:43:46 +0000 Subject: [PATCH 6/6] Changed the function name 'get_model' into 'get_client_model', added transformers to the requirements --- llmperf.py | 5 ----- openai_perf.py | 14 +++++++------- requirements.txt | 1 + 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/llmperf.py b/llmperf.py index 70a185f..da28fa3 100644 --- a/llmperf.py +++ b/llmperf.py @@ -66,11 +66,6 @@ def run_tpot(args): measurer = openai_perf.tpot_measurer(prompt, args) asyncio.run(async_run_test_n_times(measurer, args.iterations)) -def run_static_batch(args): - prompt = read_prompt_from_file(args.prompt_file) - measurer = None - run_test_n_times(measurer, args.iterations) - def run_rate_throughput(args): prompt = read_prompt_from_file(args.prompt_file) measurer = None diff --git a/openai_perf.py b/openai_perf.py index ac70006..c762283 100644 --- a/openai_perf.py +++ b/openai_perf.py @@ -2,7 +2,7 @@ from timeit import default_timer as timer def ttft_measurer(prompt, args): - client, model = get_model(args) + client, model = get_client_model(args) def single_request(): start = timer() completion = client.completions.create( @@ -20,7 +20,7 @@ def single_request(): return single_request def tpot_measurer(prompt, args): - client, model = get_model(args) + client, model = get_client_model(args) async def single_request(): start = timer() completion = client.completions.create( @@ -41,7 +41,7 @@ async def single_request(): return single_request def rate_throughput_measurer(prompt, args): - client, model = get_model(args, async_client = True) + client, model = get_client_model(args, async_client = True) async def single_request(): completion = await client.completions.create( model=model, @@ -58,7 +58,7 @@ async def single_request(): return single_request def sample_rate_throughput_measurer(args): - client, model = get_model(args, async_client = True) + client, model = get_client_model(args, async_client = True) async def single_request(sample): completion = await client.completions.create( model=model, @@ -75,7 +75,7 @@ async def single_request(sample): return single_request def sample_output_rate_throughput_measurer(args): - client, model = get_model(args, async_client = True) + client, model = get_client_model(args, async_client = True) async def single_request(sample): completion = await client.completions.create( model=model, @@ -83,14 +83,14 @@ async def single_request(sample): prompt=sample["prompt"], temperature=1, max_tokens=2048, - top_k=15, + #top_k=15, n=1, stream=False, ) return completion.usage.completion_tokens return single_request -def get_model(args, async_client=False): +def get_client_model(args, async_client=False): client = (openai.Client if not async_client else openai.AsyncClient) ( api_key = args.api_key, base_url = args.api_base diff --git a/requirements.txt b/requirements.txt index 9ff727d..ebf483b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ openai==1.34.0 +transformers==4.41.2