Source code for easydel.inference.vinference.api_server_test

import asyncio
import json
import typing as tp

import aiohttp


[docs]class ChatCompletionClient: def __init__(self, base_url: str): self.base_url = base_url
[docs] async def create_chat_completion( self, messages: tp.List[tp.Dict[str, str]], model: str, stream: bool = True, **kwargs, ) -> tp.AsyncGenerator[tp.Dict[str, tp.Any], None]: url = f"{self.base_url}/v1/chat/completions" payload = {"messages": messages, "model": model, "stream": stream, **kwargs} async with aiohttp.ClientSession() as session: async with session.post(url, json=payload) as response: if response.status != 200: raise Exception(f"Error: {response.status}") async for line in response.content: line = line.decode("utf-8").strip() if line.startswith("data: "): data = json.loads(line[6:]) yield data
[docs]async def main(): client = ChatCompletionClient("http://127.0.0.1:7680") messages = [ {"role": "system", "content": "You are a helpful assistant."}, { "role": "user", "content": "write a neural network in c++ and rust and compare them", }, ] async for chunk in client.create_chat_completion( messages, model="llama-1.53B-20241013" ): if chunk["choices"][0]["finish_reason"] is None: print(chunk["choices"][0]["response"], end="", flush=True) else: print("\nFinish reason:", chunk["choices"][0]["finish_reason"]) print("Usage:", chunk["usage"])
if __name__ == "__main__": asyncio.run(main())