102 lines
3.9 KiB
Python
102 lines
3.9 KiB
Python
import requests
|
|
from typing import List, Dict, Any, Optional, Union
|
|
from .exceptions import UnauthorizedError
|
|
|
|
|
|
class ChatClient:
|
|
def __init__(self, base_url: str, api_key: Optional[str] = None):
|
|
"""
|
|
Initialize the ChatClient.
|
|
|
|
Args:
|
|
base_url (str): The base URL of the LiteLLM proxy server (e.g., "http://localhost:8000")
|
|
api_key (Optional[str]): API key for authentication. If provided, it will be sent as a Bearer token.
|
|
"""
|
|
self._base_url = base_url.rstrip("/") # Remove trailing slash if present
|
|
self._api_key = api_key
|
|
|
|
def _get_headers(self) -> Dict[str, str]:
|
|
"""
|
|
Get the headers for API requests, including authorization if api_key is set.
|
|
|
|
Returns:
|
|
Dict[str, str]: Headers to use for API requests
|
|
"""
|
|
headers = {"Content-Type": "application/json"}
|
|
if self._api_key:
|
|
headers["Authorization"] = f"Bearer {self._api_key}"
|
|
return headers
|
|
|
|
def completions(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict[str, str]],
|
|
temperature: Optional[float] = None,
|
|
top_p: Optional[float] = None,
|
|
n: Optional[int] = None,
|
|
max_tokens: Optional[int] = None,
|
|
presence_penalty: Optional[float] = None,
|
|
frequency_penalty: Optional[float] = None,
|
|
user: Optional[str] = None,
|
|
return_request: bool = False,
|
|
) -> Union[Dict[str, Any], requests.Request]:
|
|
"""
|
|
Create a chat completion.
|
|
|
|
Args:
|
|
model (str): The model to use for completion
|
|
messages (List[Dict[str, str]]): The messages to generate a completion for
|
|
temperature (Optional[float]): Sampling temperature between 0 and 2
|
|
top_p (Optional[float]): Nucleus sampling parameter between 0 and 1
|
|
n (Optional[int]): Number of completions to generate
|
|
max_tokens (Optional[int]): Maximum number of tokens to generate
|
|
presence_penalty (Optional[float]): Presence penalty between -2.0 and 2.0
|
|
frequency_penalty (Optional[float]): Frequency penalty between -2.0 and 2.0
|
|
user (Optional[str]): Unique identifier for the end user
|
|
return_request (bool): If True, returns the prepared request object instead of executing it
|
|
|
|
Returns:
|
|
Union[Dict[str, Any], requests.Request]: Either the completion response from the server or
|
|
a prepared request object if return_request is True
|
|
|
|
Raises:
|
|
UnauthorizedError: If the request fails with a 401 status code
|
|
requests.exceptions.RequestException: If the request fails with any other error
|
|
"""
|
|
url = f"{self._base_url}/chat/completions"
|
|
|
|
# Build request data with required fields
|
|
data: Dict[str, Any] = {"model": model, "messages": messages}
|
|
|
|
# Add optional parameters if provided
|
|
if temperature is not None:
|
|
data["temperature"] = temperature
|
|
if top_p is not None:
|
|
data["top_p"] = top_p
|
|
if n is not None:
|
|
data["n"] = n
|
|
if max_tokens is not None:
|
|
data["max_tokens"] = max_tokens
|
|
if presence_penalty is not None:
|
|
data["presence_penalty"] = presence_penalty
|
|
if frequency_penalty is not None:
|
|
data["frequency_penalty"] = frequency_penalty
|
|
if user is not None:
|
|
data["user"] = user
|
|
|
|
request = requests.Request("POST", url, headers=self._get_headers(), json=data)
|
|
|
|
if return_request:
|
|
return request
|
|
|
|
# Prepare and send the request
|
|
session = requests.Session()
|
|
try:
|
|
response = session.send(request.prepare())
|
|
response.raise_for_status()
|
|
return response.json()
|
|
except requests.exceptions.HTTPError as e:
|
|
if e.response.status_code == 401:
|
|
raise UnauthorizedError(e)
|
|
raise
|