Source code for easydel.inference.vinference.api_server_test
import asyncio
import json
import typing as tp
import aiohttp
[docs]class ChatCompletionClient:
def __init__(self, base_url: str):
self.base_url = base_url
[docs] async def create_chat_completion(
self,
messages: tp.List[tp.Dict[str, str]],
model: str,
stream: bool = True,
**kwargs,
) -> tp.AsyncGenerator[tp.Dict[str, tp.Any], None]:
url = f"{self.base_url}/v1/chat/completions"
payload = {"messages": messages, "model": model, "stream": stream, **kwargs}
async with aiohttp.ClientSession() as session:
async with session.post(url, json=payload) as response:
if response.status != 200:
raise Exception(f"Error: {response.status}")
async for line in response.content:
line = line.decode("utf-8").strip()
if line.startswith("data: "):
data = json.loads(line[6:])
yield data
[docs]async def main():
client = ChatCompletionClient("http://127.0.0.1:7680")
messages = [
{"role": "system", "content": "You are a helpful assistant."},
{
"role": "user",
"content": "write a neural network in c++ and rust and compare them",
},
]
async for chunk in client.create_chat_completion(
messages, model="llama-1.53B-20241013"
):
if chunk["choices"][0]["finish_reason"] is None:
print(chunk["choices"][0]["response"], end="", flush=True)
else:
print("\nFinish reason:", chunk["choices"][0]["finish_reason"])
print("Usage:", chunk["usage"])
if __name__ == "__main__":
asyncio.run(main())