ai-station/.venv/lib/python3.12/site-packages/traceloop/sdk/experiment/utils.py

"""
Shared utilities for running experiments with OpenTelemetry span capture.
"""

import json
from traceloop.sdk import Traceloop
from traceloop.sdk.utils.in_memory_span_exporter import InMemorySpanExporter
from traceloop.sdk.tracing.tracing import TracerWrapper


def extract_trajectory_from_spans(spans):
    """
    Extract prompt and completion trajectory from OpenTelemetry spans.
    Converts gen_ai.prompt.* to llm.prompts.* format expected by evaluators.

    Args:
        spans: List of ReadableSpan objects from InMemorySpanExporter

    Returns:
        dict with trajectory_prompts, trajectory_completions, and tool_calls
    """
    # Collect all gen_ai attributes and convert to llm.prompts/completions format
    trajectory_prompts_dict = {}
    trajectory_completions_dict = {}
    tool_calls = []
    tool_inputs = []
    tool_outputs = []

    for span in spans:
        if not hasattr(span, 'attributes'):
            continue

        attributes = span.attributes or {}

        for key, value in attributes.items():
            if key.startswith("gen_ai.prompt."):
                trajectory_prompts_dict[key] = value
            elif key.startswith("gen_ai.completion."):
                trajectory_completions_dict[key] = value

        # Extract tool calls for summary
        if "gen_ai.tool.name" in attributes:
            tool_name = attributes["gen_ai.tool.name"]
            if tool_name:
                tool_calls.append(tool_name)

                # Extract tool input
                tool_input = attributes.get("gen_ai.completion.tool.arguments", "")
                if not tool_input:
                    tool_input = attributes.get("gen_ai.tool.input", "")
                tool_inputs.append(tool_input)

                # Extract tool output
                tool_output = attributes.get("gen_ai.tool.output", "")
                if not tool_output:
                    tool_output = attributes.get("gen_ai.completion.tool.result", "")
                tool_outputs.append(tool_output)

    return {
        "trajectory_prompts": trajectory_prompts_dict,
        "trajectory_completions": trajectory_completions_dict,
        "tool_calls": tool_calls,
        "tool_inputs": tool_inputs,
        "tool_outputs": tool_outputs
    }


async def run_with_span_capture(task_callable, *args, **kwargs):
    """
    Run a task with OpenTelemetry span capture and extract trajectory data.

    This function:
    1. Initializes Traceloop with InMemorySpanExporter
    2. Runs the provided async task callable
    3. Captures all OpenTelemetry spans
    4. Extracts prompt/completion trajectory from spans
    5. Returns trajectory data in JSON format

    Args:
        task_callable: Async callable to execute (e.g., run_travel_query)
        *args: Positional arguments to pass to the task callable
        **kwargs: Keyword arguments to pass to the task callable

    Returns:
        Tuple of (trajectory_prompts, trajectory_completions, final_completion)
        - trajectory_prompts: JSON string of prompt trajectory
        - trajectory_completions: JSON string of completion trajectory
        - final_completion: The final completion content string
    """
    # Clear singleton if existed to reinitialize with in-memory exporter
    if hasattr(TracerWrapper, "instance"):
        del TracerWrapper.instance

    # Create in-memory exporter to capture spans
    exporter = InMemorySpanExporter()

    # Initialize Traceloop with in-memory exporter
    Traceloop.init(
        app_name="internal-experiment-exporter",
        disable_batch=True,
        exporter=exporter,
    )

    try:
        # Run the task callable
        print(f"\n{'='*80}")
        print(f"Running task: {task_callable.__name__}")
        print(f"{'='*80}\n")

        tool_calls_made = await task_callable(*args, **kwargs)

        # Get all captured spans
        spans = exporter.get_finished_spans()

        print(f"\n{'='*80}")
        print(f"Captured {len(spans)} spans from execution")
        print(f"{'='*80}\n")

        # Extract trajectory from spans
        trajectory_data = extract_trajectory_from_spans(spans)

        # Get the final completion from llm.completions dict
        completions_dict = trajectory_data["trajectory_completions"]
        final_completion = ""
        if completions_dict:
            # Find the highest index completion content
            max_idx = -1
            for key in completions_dict.keys():
                if ".content" in key:
                    try:
                        parts = key.split(".")
                        idx = int(parts[2])
                        if idx > max_idx:
                            max_idx = idx
                            final_completion = completions_dict[key]
                    except (ValueError, IndexError):
                        pass

        # trajectory_prompts and trajectory_completions are dicts with llm.prompts/completions.* keys
        # If empty, use JSON string fallback to avoid validation errors
        trajectory_prompts = trajectory_data["trajectory_prompts"]
        trajectory_completions = trajectory_data["trajectory_completions"]

        # Convert to JSON strings if empty (evaluators expect string when no data)
        if not trajectory_prompts:
            trajectory_prompts = json.dumps([])
        if not trajectory_completions:
            trajectory_completions = json.dumps([])

        print("📊 Trajectory Summary:")
        print(f"  - Prompt attributes captured: {len(trajectory_prompts)}")
        print(f"  - Completion attributes captured: {len(trajectory_completions)}")
        tools_called = ', '.join(trajectory_data['tool_calls']) if trajectory_data['tool_calls'] else 'None'
        print(f"  - Tools called: {tools_called}")
        if tool_calls_made:
            print(f"  - Tools from run: {', '.join(tool_calls_made) if tool_calls_made else 'None'}\n")

        json_trajectory_prompts = json.dumps(trajectory_prompts)
        json_trajectory_completions = json.dumps(trajectory_completions)

        return json_trajectory_prompts, json_trajectory_completions, final_completion

    except Exception as e:
        raise e
Fix: Revert to default theme for stability (v1.3.2) 2025-12-29 12:39:29 +00:00			`"""`
			`Shared utilities for running experiments with OpenTelemetry span capture.`
			`"""`

			`import json`
			`from traceloop.sdk import Traceloop`
			`from traceloop.sdk.utils.in_memory_span_exporter import InMemorySpanExporter`
			`from traceloop.sdk.tracing.tracing import TracerWrapper`


			`def extract_trajectory_from_spans(spans):`
			`"""`
			`Extract prompt and completion trajectory from OpenTelemetry spans.`
			`Converts gen_ai.prompt.* to llm.prompts.* format expected by evaluators.`

			`Args:`
			`spans: List of ReadableSpan objects from InMemorySpanExporter`

			`Returns:`
			`dict with trajectory_prompts, trajectory_completions, and tool_calls`
			`"""`
			`# Collect all gen_ai attributes and convert to llm.prompts/completions format`
			`trajectory_prompts_dict = {}`
			`trajectory_completions_dict = {}`
			`tool_calls = []`
			`tool_inputs = []`
			`tool_outputs = []`

			`for span in spans:`
			`if not hasattr(span, 'attributes'):`
			`continue`

			`attributes = span.attributes or {}`

			`for key, value in attributes.items():`
			`if key.startswith("gen_ai.prompt."):`
			`trajectory_prompts_dict[key] = value`
			`elif key.startswith("gen_ai.completion."):`
			`trajectory_completions_dict[key] = value`

			`# Extract tool calls for summary`
			`if "gen_ai.tool.name" in attributes:`
			`tool_name = attributes["gen_ai.tool.name"]`
			`if tool_name:`
			`tool_calls.append(tool_name)`

			`# Extract tool input`
			`tool_input = attributes.get("gen_ai.completion.tool.arguments", "")`
			`if not tool_input:`
			`tool_input = attributes.get("gen_ai.tool.input", "")`
			`tool_inputs.append(tool_input)`

			`# Extract tool output`
			`tool_output = attributes.get("gen_ai.tool.output", "")`
			`if not tool_output:`
			`tool_output = attributes.get("gen_ai.completion.tool.result", "")`
			`tool_outputs.append(tool_output)`

			`return {`
			`"trajectory_prompts": trajectory_prompts_dict,`
			`"trajectory_completions": trajectory_completions_dict,`
			`"tool_calls": tool_calls,`
			`"tool_inputs": tool_inputs,`
			`"tool_outputs": tool_outputs`
			`}`


			`async def run_with_span_capture(task_callable, args, *kwargs):`
			`"""`
			`Run a task with OpenTelemetry span capture and extract trajectory data.`

			`This function:`
			`1. Initializes Traceloop with InMemorySpanExporter`
			`2. Runs the provided async task callable`
			`3. Captures all OpenTelemetry spans`
			`4. Extracts prompt/completion trajectory from spans`
			`5. Returns trajectory data in JSON format`

			`Args:`
			`task_callable: Async callable to execute (e.g., run_travel_query)`
			`*args: Positional arguments to pass to the task callable`
			`**kwargs: Keyword arguments to pass to the task callable`

			`Returns:`
			`Tuple of (trajectory_prompts, trajectory_completions, final_completion)`
			`- trajectory_prompts: JSON string of prompt trajectory`
			`- trajectory_completions: JSON string of completion trajectory`
			`- final_completion: The final completion content string`
			`"""`
			`# Clear singleton if existed to reinitialize with in-memory exporter`
			`if hasattr(TracerWrapper, "instance"):`
			`del TracerWrapper.instance`

			`# Create in-memory exporter to capture spans`
			`exporter = InMemorySpanExporter()`

			`# Initialize Traceloop with in-memory exporter`
			`Traceloop.init(`
			`app_name="internal-experiment-exporter",`
			`disable_batch=True,`
			`exporter=exporter,`
			`)`

			`try:`
			`# Run the task callable`
			`print(f"\n{'='*80}")`
			`print(f"Running task: {task_callable.__name__}")`
			`print(f"{'='*80}\n")`

			`tool_calls_made = await task_callable(args, *kwargs)`

			`# Get all captured spans`
			`spans = exporter.get_finished_spans()`

			`print(f"\n{'='*80}")`
			`print(f"Captured {len(spans)} spans from execution")`
			`print(f"{'='*80}\n")`

			`# Extract trajectory from spans`
			`trajectory_data = extract_trajectory_from_spans(spans)`

			`# Get the final completion from llm.completions dict`
			`completions_dict = trajectory_data["trajectory_completions"]`
			`final_completion = ""`
			`if completions_dict:`
			`# Find the highest index completion content`
			`max_idx = -1`
			`for key in completions_dict.keys():`
			`if ".content" in key:`
			`try:`
			`parts = key.split(".")`
			`idx = int(parts[2])`
			`if idx > max_idx:`
			`max_idx = idx`
			`final_completion = completions_dict[key]`
			`except (ValueError, IndexError):`
			`pass`

			`# trajectory_prompts and trajectory_completions are dicts with llm.prompts/completions.* keys`
			`# If empty, use JSON string fallback to avoid validation errors`
			`trajectory_prompts = trajectory_data["trajectory_prompts"]`
			`trajectory_completions = trajectory_data["trajectory_completions"]`

			`# Convert to JSON strings if empty (evaluators expect string when no data)`
			`if not trajectory_prompts:`
			`trajectory_prompts = json.dumps([])`
			`if not trajectory_completions:`
			`trajectory_completions = json.dumps([])`

			`print("📊 Trajectory Summary:")`
			`print(f" - Prompt attributes captured: {len(trajectory_prompts)}")`
			`print(f" - Completion attributes captured: {len(trajectory_completions)}")`
			`tools_called = ', '.join(trajectory_data['tool_calls']) if trajectory_data['tool_calls'] else 'None'`
			`print(f" - Tools called: {tools_called}")`
			`if tool_calls_made:`
			`print(f" - Tools from run: {', '.join(tool_calls_made) if tool_calls_made else 'None'}\n")`

			`json_trajectory_prompts = json.dumps(trajectory_prompts)`
			`json_trajectory_completions = json.dumps(trajectory_completions)`

			`return json_trajectory_prompts, json_trajectory_completions, final_completion`

			`except Exception as e:`
			`raise e`