ai-station/.venv/lib/python3.12/site-packages/opentelemetry/instrumentation/cohere/span_utils.py

315 lines
12 KiB
Python

from opentelemetry.instrumentation.cohere.utils import (
dont_throw,
dump_object,
should_send_prompts,
to_dict,
should_emit_events,
)
from opentelemetry.semconv._incubating.attributes import (
gen_ai_attributes as GenAIAttributes,
)
from opentelemetry.semconv_ai import (
LLMRequestTypeValues,
SpanAttributes,
)
from opentelemetry.trace.status import Status, StatusCode
def _set_span_attribute(span, name, value):
if value is not None:
if value != "":
span.set_attribute(name, value)
return
@dont_throw
def set_input_content_attributes(span, llm_request_type, kwargs):
if not span.is_recording():
return
if should_send_prompts() and not should_emit_events():
if llm_request_type == LLMRequestTypeValues.COMPLETION:
_set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "user")
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", kwargs.get("prompt")
)
# client V1
elif llm_request_type == LLMRequestTypeValues.CHAT and kwargs.get("message"):
user_message_index = 0
if system_message := kwargs.get("preamble"):
_set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.role", "system")
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.0.content", system_message
)
user_message_index = 1
_set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.{user_message_index}.role", "user")
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.{user_message_index}.content", kwargs.get("message")
)
# client V2
elif llm_request_type == LLMRequestTypeValues.CHAT and kwargs.get("messages"):
for index, message in enumerate(kwargs.get("messages")):
message_dict = to_dict(message)
_set_span_attribute(span, f"{GenAIAttributes.GEN_AI_PROMPT}.{index}.role", message_dict.get("role"))
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.{index}.content", message_dict.get("content")
)
if kwargs.get("tools"):
for index, tool in enumerate(kwargs.get("tools")):
function = tool.get("function")
if not function:
continue
_set_span_attribute(
span,
f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{index}.name",
function.get("name"),
)
_set_span_attribute(
span,
f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{index}.description",
function.get("description"),
)
_set_span_attribute(
span,
f"{SpanAttributes.LLM_REQUEST_FUNCTIONS}.{index}.parameters",
dump_object(function.get("parameters")),
)
elif llm_request_type == LLMRequestTypeValues.RERANK:
for index, document in enumerate(kwargs.get("documents", [])):
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.{index}.role", "system"
)
_set_span_attribute(
span, f"{GenAIAttributes.GEN_AI_PROMPT}.{index}.content", document
)
_set_span_attribute(
span,
f"{GenAIAttributes.GEN_AI_PROMPT}.{len(kwargs.get('documents'))}.role",
"user",
)
_set_span_attribute(
span,
f"{GenAIAttributes.GEN_AI_PROMPT}.{len(kwargs.get('documents'))}.content",
kwargs.get("query"),
)
elif llm_request_type == LLMRequestTypeValues.EMBEDDING:
_set_span_attribute(
span,
f"{GenAIAttributes.GEN_AI_PROMPT}.0.role",
"user",
)
inputs = kwargs.get("inputs")
if not inputs:
texts = kwargs.get("texts")
inputs = [
{"type": "text", "text": text} for text in texts
]
_set_span_attribute(
span,
f"{GenAIAttributes.GEN_AI_PROMPT}.0.content",
dump_object(inputs),
)
@dont_throw
def set_response_content_attributes(span, llm_request_type, response):
if not span.is_recording():
return
if should_send_prompts():
if llm_request_type == LLMRequestTypeValues.CHAT:
_set_span_chat_response(span, response)
elif llm_request_type == LLMRequestTypeValues.COMPLETION:
_set_span_generations_response(span, response)
elif llm_request_type == LLMRequestTypeValues.RERANK:
_set_span_rerank_response(span, response)
span.set_status(Status(StatusCode.OK))
@dont_throw
def set_span_request_attributes(span, kwargs):
if not span.is_recording():
return
_set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_MODEL, kwargs.get("model"))
_set_span_attribute(
span, GenAIAttributes.GEN_AI_REQUEST_MAX_TOKENS, kwargs.get("max_tokens_to_sample")
)
_set_span_attribute(
span, GenAIAttributes.GEN_AI_REQUEST_TEMPERATURE, kwargs.get("temperature")
)
_set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_TOP_P, kwargs.get("p", kwargs.get("top_p")))
_set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_TOP_K, kwargs.get("k", kwargs.get("top_k")))
if stop_sequences := kwargs.get("stop_sequences", []):
_set_span_attribute(span, GenAIAttributes.GEN_AI_REQUEST_STOP_SEQUENCES, dump_object(stop_sequences))
# TODO: Migrate to GEN_AI_REQUEST_FREQUENCY_PENALTY and GEN_AI_REQUEST_PRESENCE_PENALTY
_set_span_attribute(
span, SpanAttributes.LLM_FREQUENCY_PENALTY, kwargs.get("frequency_penalty")
)
_set_span_attribute(
span, SpanAttributes.LLM_PRESENCE_PENALTY, kwargs.get("presence_penalty")
)
@dont_throw
def set_span_response_attributes(span, response):
if not span.is_recording():
return
response_dict = to_dict(response)
# Cohere API v1
if (response_dict.get("response_id")):
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response_dict.get("response_id"))
# Cohere API v2
elif (response_dict.get("id")):
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response_dict.get("id"))
# Cohere v4
if token_count := response_dict.get("token_count"):
token_count_dict = to_dict(token_count)
input_tokens = token_count_dict.get("prompt_tokens", 0)
output_tokens = token_count_dict.get("response_tokens", 0)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
input_tokens + output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
input_tokens,
)
# Cohere v5
if response_dict.get("meta"):
meta_dict = to_dict(response_dict.get("meta", {}))
billed_units = meta_dict.get("billed_units", {})
billed_units_dict = to_dict(billed_units)
input_tokens = billed_units_dict.get("input_tokens", 0)
output_tokens = billed_units_dict.get("output_tokens", 0)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
input_tokens + output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
input_tokens,
)
# Cohere API v2
if response_dict.get("usage"):
# usage also has usage.tokens of type UsageTokens. This usually
# has the same number of output tokens, but many more input tokens
# (possibly pre-prompted)")
usage_dict = to_dict(response_dict.get("usage", {}))
billed_units_dict = to_dict(usage_dict.get("billed_units", {}))
input_tokens = billed_units_dict.get("input_tokens", 0)
output_tokens = billed_units_dict.get("output_tokens", 0)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_TOTAL_TOKENS,
input_tokens + output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_COMPLETION_TOKENS,
output_tokens,
)
_set_span_attribute(
span,
SpanAttributes.LLM_USAGE_PROMPT_TOKENS,
input_tokens,
)
def _set_span_chat_response(span, response):
index = 0
prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
_set_span_attribute(span, f"{prefix}.role", "assistant")
response_dict = to_dict(response)
if finish_reason := response_dict.get("finish_reason"):
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason])
# Cohere API v1
if text := response_dict.get("text"):
_set_span_attribute(span, f"{prefix}.content", text)
# Cohere API v2
elif message := response_dict.get("message"):
message_dict = to_dict(message)
content = message_dict.get("content") or []
if tool_plan := message_dict.get("tool_plan"):
content.append({
"type": "text",
"text": tool_plan,
})
# TODO: Add citations, similarly to tool_plan
_set_span_attribute(span, f"{prefix}.content", dump_object(content))
if tool_calls := message_dict.get("tool_calls"):
tool_call_index = 0
for tool_call in tool_calls:
if not tool_call.get("function"):
continue
function = tool_call.get("function")
if tool_call.get("id"):
_set_span_attribute(span, f"{prefix}.tool_calls.{tool_call_index}.id", tool_call.get("id"))
if function.get("name"):
_set_span_attribute(span, f"{prefix}.tool_calls.{tool_call_index}.name", function.get("name"))
if function.get("arguments"):
# no dump_object here, since it's already a string (OpenAI-like)
_set_span_attribute(
span,
f"{prefix}.tool_calls.{tool_call_index}.arguments",
function.get("arguments"),
)
tool_call_index += 1
def _set_span_generations_response(span, response):
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response.id)
if hasattr(response, "generations"):
generations = response.generations # Cohere v5
else:
generations = response # Cohere v4
for index, generation in enumerate(generations):
prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{index}"
_set_span_attribute(span, f"{prefix}.content", generation.text)
_set_span_attribute(span, f"gen_ai.response.{index}.id", generation.id)
def _set_span_rerank_response(span, response):
_set_span_attribute(span, GenAIAttributes.GEN_AI_RESPONSE_ID, response.id)
for idx, doc in enumerate(response.results):
prefix = f"{GenAIAttributes.GEN_AI_COMPLETION}.{idx}"
_set_span_attribute(span, f"{prefix}.role", "assistant")
content = f"Doc {doc.index}, Score: {doc.relevance_score}"
if hasattr(doc, "document") and doc.document:
if hasattr(doc.document, "text"):
content += f"\n{doc.document.text}"
else:
content += f"\n{doc.document.get('text')}"
_set_span_attribute(
span,
f"{prefix}.content",
content,
)