88 lines
3.0 KiB
Python
88 lines
3.0 KiB
Python
from contextlib import contextmanager
|
|
from opentelemetry.semconv._incubating.attributes import (
|
|
gen_ai_attributes as GenAIAttributes,
|
|
)
|
|
from opentelemetry import context
|
|
from opentelemetry.semconv_ai import SpanAttributes
|
|
from opentelemetry.trace import Span, set_span_in_context
|
|
from pydantic import BaseModel
|
|
from typing import Optional
|
|
from traceloop.sdk.tracing.context_manager import get_tracer
|
|
|
|
|
|
class LLMMessage(BaseModel):
|
|
role: str
|
|
content: str
|
|
|
|
|
|
class LLMUsage(BaseModel):
|
|
prompt_tokens: int
|
|
completion_tokens: int
|
|
total_tokens: int
|
|
cache_creation_input_tokens: Optional[int] = None
|
|
cache_read_input_tokens: Optional[int] = None
|
|
|
|
|
|
class LLMSpan:
|
|
_span: Span = None
|
|
|
|
def __init__(self, span: Span):
|
|
self._span = span
|
|
pass
|
|
|
|
def report_request(self, model: str, messages: list[LLMMessage]):
|
|
self._span.set_attribute(GenAIAttributes.GEN_AI_REQUEST_MODEL, model)
|
|
for idx, message in enumerate(messages):
|
|
self._span.set_attribute(
|
|
f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.role", message.role
|
|
)
|
|
self._span.set_attribute(
|
|
f"{GenAIAttributes.GEN_AI_PROMPT}.{idx}.content", message.content
|
|
)
|
|
|
|
def report_response(self, model: str, completions: list[str]):
|
|
self._span.set_attribute(GenAIAttributes.GEN_AI_RESPONSE_MODEL, model)
|
|
for idx, completion in enumerate(completions):
|
|
self._span.set_attribute(
|
|
f"{GenAIAttributes.GEN_AI_COMPLETION}.{idx}.role", "assistant"
|
|
)
|
|
self._span.set_attribute(
|
|
f"{GenAIAttributes.GEN_AI_COMPLETION}.{idx}.content", completion
|
|
)
|
|
|
|
def report_usage(self, usage: LLMUsage):
|
|
self._span.set_attribute(
|
|
GenAIAttributes.GEN_AI_USAGE_INPUT_TOKENS, usage.prompt_tokens
|
|
)
|
|
self._span.set_attribute(
|
|
GenAIAttributes.GEN_AI_USAGE_OUTPUT_TOKENS, usage.completion_tokens
|
|
)
|
|
self._span.set_attribute(
|
|
SpanAttributes.LLM_USAGE_TOTAL_TOKENS, usage.total_tokens
|
|
)
|
|
if usage.cache_creation_input_tokens is not None:
|
|
self._span.set_attribute(
|
|
SpanAttributes.LLM_USAGE_CACHE_CREATION_INPUT_TOKENS,
|
|
usage.cache_creation_input_tokens,
|
|
)
|
|
if usage.cache_read_input_tokens is not None:
|
|
self._span.set_attribute(
|
|
SpanAttributes.LLM_USAGE_CACHE_READ_INPUT_TOKENS,
|
|
usage.cache_read_input_tokens,
|
|
)
|
|
|
|
|
|
@contextmanager
|
|
def track_llm_call(vendor: str, type: str):
|
|
with get_tracer() as tracer:
|
|
span = tracer.start_span(name=f"{vendor}.{type}")
|
|
span.set_attribute(GenAIAttributes.GEN_AI_SYSTEM, vendor)
|
|
span.set_attribute(SpanAttributes.LLM_REQUEST_TYPE, type)
|
|
ctx = set_span_in_context(span)
|
|
token = context.attach(ctx)
|
|
try:
|
|
yield LLMSpan(span)
|
|
finally:
|
|
context.detach(token)
|
|
span.end()
|