ai-station/.venv/lib/python3.12/site-packages/literalai/evaluation/dataset.py

193 lines
5.9 KiB
Python

from dataclasses import dataclass, field
from typing import TYPE_CHECKING, Dict, List, Literal, Optional, cast
from typing_extensions import TypedDict
from literalai.my_types import Utils
if TYPE_CHECKING:
from literalai.api import LiteralAPI
from literalai.evaluation.dataset_experiment import DatasetExperiment
from literalai.evaluation.dataset_item import DatasetItem, DatasetItemDict
DatasetType = Literal["key_value", "generation"]
class DatasetDict(TypedDict, total=False):
id: str
createdAt: str
metadata: Dict
name: Optional[str]
description: Optional[str]
items: Optional[List[DatasetItemDict]]
type: DatasetType
@dataclass(repr=False)
class Dataset(Utils):
"""
A dataset of items, each item representing an ideal scenario to run experiments on.
"""
api: "LiteralAPI"
id: str
created_at: str
metadata: Dict
name: Optional[str] = None
description: Optional[str] = None
items: List[DatasetItem] = field(default_factory=lambda: [])
type: DatasetType = "key_value"
def to_dict(self):
return {
"id": self.id,
"createdAt": self.created_at,
"metadata": self.metadata,
"name": self.name,
"description": self.description,
"items": [item.to_dict() for item in self.items],
"type": self.type,
}
@classmethod
def from_dict(cls, api: "LiteralAPI", dataset: DatasetDict) -> "Dataset":
items = dataset.get("items", [])
if not isinstance(items, list):
raise Exception("Dataset items should be an array")
return cls(
api=api,
id=dataset.get("id", ""),
created_at=dataset.get("createdAt", ""),
metadata=dataset.get("metadata", {}),
name=dataset.get("name"),
description=dataset.get("description"),
items=[DatasetItem.from_dict(item) for item in items],
type=dataset.get("type", cast(DatasetType, "key_value")),
)
def update(
self,
name: Optional[str] = None,
description: Optional[str] = None,
metadata: Optional[Dict] = None,
):
"""
Update the dataset with the given name, description and metadata.
"""
updated_dataset = self.api.update_dataset(
self.id, name=name, description=description, metadata=metadata
)
self.name = updated_dataset.name
self.description = updated_dataset.description
self.metadata = updated_dataset.metadata
def delete(self):
"""
Deletes the dataset.
"""
self.api.delete_dataset(self.id)
def create_item(
self,
input: Dict,
expected_output: Optional[Dict] = None,
metadata: Optional[Dict] = None,
) -> DatasetItem:
"""
Creates a new dataset item and adds it to this dataset.
Args:
input: The input data for the dataset item.
expected_output: The output data for the dataset item (optional).
metadata: Metadata for the dataset item (optional).
Returns:
`DatasetItem`:The created DatasetItem instance.
"""
dataset_item = self.api.create_dataset_item(
self.id, input, expected_output, metadata
)
if self.items is None:
self.items = []
self.items.append(dataset_item)
return dataset_item
def create_experiment(
self,
name: str,
prompt_variant_id: Optional[str] = None,
params: Optional[Dict] = None,
) -> DatasetExperiment:
"""
Creates a new dataset experiment based on this dataset.
Args:
name: The name of the experiment.
prompt_variant_id: The Prompt variant ID to experiment on.
params: The params used on the experiment.
Returns:
`DatasetExperiment`: The created DatasetExperiment instance.
"""
experiment = self.api.create_experiment(
name=name,
dataset_id=self.id,
prompt_variant_id=prompt_variant_id,
params=params,
)
return experiment
def delete_item(self, item_id: str):
"""
Deletes a dataset item from this dataset.
Args:
item_id: The ID of the dataset item to delete.
"""
self.api.delete_dataset_item(item_id)
if self.items is not None:
self.items = [item for item in self.items if item.id != item_id]
def add_step(self, step_id: str, metadata: Optional[Dict] = None) -> DatasetItem:
"""
Create a new dataset item based on a step and add it to this dataset.
Args:
step_id: The id of the step to add to the dataset.
metadata: Metadata for the dataset item (optional).
Returns:
`DatasetItem`: The created DatasetItem instance.
"""
if self.type == "generation":
raise ValueError("Cannot add a step to a generation dataset")
dataset_item = self.api.add_step_to_dataset(self.id, step_id, metadata)
if self.items is None:
self.items = []
self.items.append(dataset_item)
return dataset_item
def add_generation(
self, generation_id: str, metadata: Optional[Dict] = None
) -> DatasetItem:
"""
Create a new dataset item based on a generation and add it to this dataset.
Args:
generation_id: The id of the generation to add to the dataset.
metadata: Metadata for the dataset item (optional).
Returns:
`DatasetItem`: The created DatasetItem instance.
"""
dataset_item = self.api.add_generation_to_dataset(
self.id, generation_id, metadata
)
if self.items is None:
self.items = []
self.items.append(dataset_item)
return dataset_item