Compare commits

..

2 Commits

Author SHA1 Message Date
AI Station Server bffd9aa249 Fix: Smart chunking per Excel e Hybrid Search funzionante 2025-12-30 17:06:13 +01:00
AI Station Server 9cef64f9ea implementazione BGE-M3 Dense 2025-12-30 08:51:10 +01:00
14 changed files with 388 additions and 553 deletions

View File

@ -1,68 +1,153 @@
[project]
# List of environment variables to be provided by each user to use the app.
user_env = []
# Duration (in seconds) during which the session is saved when the connection is lost
session_timeout = 3600
user_session_timeout = 1296000
# Duration (in seconds) of the user session expiry
user_session_timeout = 1296000 # 15 days
# Enable third parties caching (e.g., LangChain cache)
cache = false
# Whether to persist user environment variables (API keys) to the database
# Set to true to store user env vars in DB, false to exclude them for security
persist_user_env = false
# Whether to mask user environment variables (API keys) in the UI with password type
# Set to true to show API keys as ***, false to show them as plain text
mask_user_env = false
# Authorized origins
allow_origins = ["*"]
[features]
unsafe_allow_html = true
# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript)
unsafe_allow_html = false
# Process and display mathematical expressions. This can clash with "$" characters in messages.
latex = false
# Autoscroll new user messages at the top of the window
user_message_autoscroll = true
# Automatically tag threads with the current chat profile (if a chat profile is used)
auto_tag_thread = true
# Allow users to edit their own messages
edit_message = true
# Allow users to share threads (backend + UI). Requires an app-defined on_shared_thread_view callback.
allow_thread_sharing = false
[features.slack]
# Add emoji reaction when message is received (requires reactions:write OAuth scope)
reaction_on_message_received = false
# Authorize users to spontaneously upload files with messages
[features.spontaneous_file_upload]
enabled = true
accept = ["*"]
max_files = 20
max_size_mb = 500
enabled = true
# Define accepted file types using MIME types
# Examples:
# 1. For specific file types:
# accept = ["image/jpeg", "image/png", "application/pdf"]
# 2. For all files of certain type:
# accept = ["image/*", "audio/*", "video/*"]
# 3. For specific file extensions:
# accept = { "application/octet-stream" = [".xyz", ".pdb"] }
# Note: Using "*/*" is not recommended as it may cause browser warnings
accept = ["*/*"]
max_files = 20
max_size_mb = 500
[features.audio]
enabled = false
sample_rate = 24000
# Enable audio features
enabled = false
# Sample rate of the audio
sample_rate = 24000
[features.mcp]
enabled = false
# Enable Model Context Protocol (MCP) features
enabled = false
[features.mcp.sse]
enabled = true
[features.mcp.streamable-http]
enabled = true
[features.mcp.stdio]
enabled = true
# Only the executables in the allow list can be used for MCP stdio server.
# Only need the base name of the executable, e.g. "npx", not "/usr/bin/npx".
# Please don't comment this line for now, we need it to parse the executable name.
allowed_executables = [ "npx", "uvx" ]
[UI]
name = "Ai Station DFFM"
default_theme = "dark"
layout = "wide"
# Name of the assistant.
name = "Assistant"
# default_theme = "dark"
# layout = "wide"
default_sidebar_state = "open"
# Più “SaaS”: evita di mostrare troppo ragionamento di default
cot = "tool_call"
# Description of the assistant. This is used for HTML tags.
# description = ""
custom_css = "/public/ui-s-tier.css"
# Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full".
cot = "full"
[UI.theme]
primary_color = "#0066CC"
background_color = "#1a1a1a"
alert_style = "modern"
# Specify a CSS file that can be used to customize the user interface.
# The CSS file can be served from the public directory or via an external link.
# custom_css = "/public/test.css"
# Brand assets (metti i file in /public/brand/)
logo_file_url = "/public/brand/logo-header.png"
default_avatar_file_url = "/public/brand/avatar.png"
login_page_image = "/public/brand/login.jpg"
login_page_image_filter = "brightness-50 grayscale"
# Specify additional attributes for a custom CSS file
# custom_css_attributes = "media=\"print\""
[[UI.header_links]]
name = "Docs"
display_name = "Docs"
icon_url = "/public/brand/icon-32.png"
url = "https://ai.dffm.it"
target = "_blank"
# Specify a JavaScript file that can be used to customize the user interface.
# The JavaScript file can be served from the public directory.
# custom_js = "/public/test.js"
[[UI.header_links]]
name = "Support"
display_name = "Support"
icon_url = "/public/brand/icon-32.png"
url = "mailto:support@dffm.it"
target = "_blank"
# The style of alert boxes. Can be "classic" or "modern".
alert_style = "classic"
# Specify additional attributes for custom JS file
# custom_js_attributes = "async type = \"module\""
# Custom login page image, relative to public directory or external URL
# login_page_image = "/public/custom-background.jpg"
# Custom login page image filter (Tailwind internal filters, no dark/light variants)
# login_page_image_filter = "brightness-50 grayscale"
# login_page_image_dark_filter = "contrast-200 blur-sm"
# Specify a custom meta URL (used for meta tags like og:url)
# custom_meta_url = "https://github.com/Chainlit/chainlit"
# Specify a custom meta image url.
# custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png"
# Load assistant logo directly from URL.
logo_file_url = ""
# Load assistant avatar image directly from URL.
default_avatar_file_url = ""
# Specify a custom build directory for the frontend.
# This can be used to customize the frontend code.
# Be careful: If this is a relative path, it should not start with a slash.
# custom_build = "./public/build"
# Specify optional one or more custom links in the header.
# [[UI.header_links]]
# name = "Issues"
# display_name = "Report Issue"
# icon_url = "https://avatars.githubusercontent.com/u/128686189?s=200&v=4"
# url = "https://github.com/Chainlit/chainlit/issues"
# target = "_blank" (default) # Optional: "_self", "_parent", "_top".
[meta]
generated_by = "2.8.3"

5
.gitignore vendored
View File

@ -3,7 +3,4 @@ __pycache__/
*.pyc
.aider*
workspaces/*
qdrant_storage/.files/
__pycache__/
.env
.files/
qdrant_storage/

770
app.py
View File

@ -2,29 +2,19 @@ import os
import re
import uuid
import shutil
import requests
import time
import pandas as pd # NUOVO: Gestione Excel
import httpx # NUOVO: Chiamate API Remote
from datetime import datetime
from typing import Optional, Dict, Any, List
from typing import Optional, Dict, List
import chainlit as cl
import ollama
from docling.document_converter import DocumentConverter
import fitz # PyMuPDF
from qdrant_client import AsyncQdrantClient
from qdrant_client.models import (
PointStruct,
Distance,
VectorParams,
SparseVectorParams,
Prefetch,
)
from qdrant_client import models
from qdrant_client.models import PointStruct, Distance, VectorParams, SparseVectorParams, SparseIndexParams
from chainlit.data.sql_alchemy import SQLAlchemyDataLayer
from chainlit.types import ThreadDict
from functools import lru_cache
# === FIX IMPORT ROBUSTO Storage Client ===
# === FIX IMPORT ===
try:
from chainlit.data.storage_clients import BaseStorageClient
except ImportError:
@ -33,591 +23,351 @@ except ImportError:
except ImportError:
from chainlit.data.storage_clients.base import BaseStorageClient
# =========================
# CONFIG
# =========================
DATABASE_URL = os.getenv(
"DATABASE_URL",
"postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station",
)
# === CONFIGURAZIONE ===
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station")
# PUNTANO AL SERVER .243 (Il "Cervello")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434")
BGE_API_URL = os.getenv("BGE_API_URL", "http://192.168.1.243:8001")
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333")
BGE_API_URL = os.getenv("BGE_API_URL", "http://192.168.1.243:8001/embed")
VISION_MODEL = "minicpm-v"
DEFAULT_TEXT_MODEL = "glm-4.6:cloud"
MINIMAX_MODEL = "minimax-m2.1:cloud"
MODEL_CHOICES = [
DEFAULT_TEXT_MODEL,
MINIMAX_MODEL,
"llama3.2",
"mistral",
"qwen2.5-coder:32b",
]
WORKSPACES_DIR = "./workspaces"
STORAGE_DIR = "./.files"
os.makedirs(STORAGE_DIR, exist_ok=True)
os.makedirs(WORKSPACES_DIR, exist_ok=True)
# =========================
# USER PROFILES
# =========================
# === MAPPING UTENTI E RUOLI ===
USER_PROFILES = {
"giuseppe@defranceschi.pro": {
"role": "admin",
"name": "Giuseppe",
"workspace": "admin_workspace",
"rag_collection": "admin_docs",
"capabilities": ["debug", "all"],
"show_code": True,
"capabilities": ["debug", "system_prompts", "user_management", "all_models"],
"show_code": True
},
"federica.tecchio@gmail.com": {
"role": "business",
"name": "Federica",
"workspace": "business_workspace",
"rag_collection": "contabilita",
"capabilities": ["basic_chat"],
"show_code": False,
"capabilities": ["pdf_upload", "basic_chat"],
"show_code": False
},
"riccardob545@gmail.com": {
"giuseppe.defranceschi@gmail.com": {
"role": "admin",
"name": "Giuseppe",
"workspace": "admin_workspace",
"rag_collection": "admin_docs",
"capabilities": ["debug", "system_prompts", "user_management", "all_models"],
"show_code": True
},
"riccardob545@gmail.com": {
"role": "engineering",
"name": "Riccardo",
"workspace": "engineering_workspace",
"rag_collection": "engineering_docs",
"capabilities": ["code"],
"show_code": True,
"capabilities": ["code_execution", "data_viz", "advanced_chat"],
"show_code": True
},
"giuliadefranceschi05@gmail.com": {
"role": "architecture",
"name": "Giulia",
"workspace": "architecture_workspace",
"rag_collection": "architecture_manuals",
"capabilities": ["visual"],
"show_code": False,
},
"giuseppe.defranceschi@gmail.com": {
"role": "architecture",
"name": "Giuseppe",
"workspace": "architecture_workspace",
"rag_collection": "architecture_manuals",
"capabilities": ["visual"],
"show_code": False,
},
"capabilities": ["visual_chat", "pdf_upload", "image_gen"],
"show_code": False
}
}
GUEST_PROFILE = {
"role": "guest",
"name": "Guest",
"workspace": "guest",
"rag_collection": "public",
"capabilities": ["basic_chat"],
"show_code": False,
}
# Sensible defaults per ruolo (S-Tier: thoughtful defaults) [file:3]
ROLE_DEFAULTS = {
"admin": {
"model": DEFAULT_TEXT_MODEL,
"top_k": 6,
"temperature": 0.3,
"rag_enabled": True,
"vision_detail": "high",
},
"engineering": {
"model": MINIMAX_MODEL,
"top_k": 5,
"temperature": 0.3,
"rag_enabled": True,
"vision_detail": "low",
},
"business": {
"model": DEFAULT_TEXT_MODEL,
"top_k": 4,
"temperature": 0.2,
"rag_enabled": True,
"vision_detail": "auto",
},
"architecture": {
"model": DEFAULT_TEXT_MODEL,
"top_k": 4,
"temperature": 0.3,
"rag_enabled": True,
"vision_detail": "high",
},
"guest": {
"model": DEFAULT_TEXT_MODEL,
"top_k": 3,
"temperature": 0.2,
"rag_enabled": False,
"vision_detail": "auto",
},
}
# =========================
# STORAGE
# =========================
# === STORAGE CLIENT ===
class LocalStorageClient(BaseStorageClient):
def __init__(self, storage_path: str):
self.storage_path = storage_path
os.makedirs(storage_path, exist_ok=True)
async def upload_file(
self,
object_key: str,
data: bytes,
mime: str = "application/octet-stream",
overwrite: bool = True,
) -> Dict[str, str]:
async def upload_file(self, object_key: str, data: bytes, mime: str = "application/octet-stream", overwrite: bool = True) -> Dict[str, str]:
file_path = os.path.join(self.storage_path, object_key)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "wb") as f:
f.write(data)
with open(file_path, "wb") as f: f.write(data)
return {"object_key": object_key, "url": f"/files/{object_key}"}
async def get_read_url(self, object_key: str) -> str:
return f"/files/{object_key}"
async def get_read_url(self, object_key: str) -> str: return f"/files/{object_key}"
async def delete_file(self, object_key: str) -> bool:
path = os.path.join(self.storage_path, object_key)
if os.path.exists(path):
os.remove(path)
return True
if os.path.exists(path): os.remove(path); return True
return False
async def close(self):
pass
async def close(self): pass
@cl.data_layer
def get_data_layer():
return SQLAlchemyDataLayer(conninfo=DATABASE_URL, storage_provider=LocalStorageClient(STORAGE_DIR))
return SQLAlchemyDataLayer(conninfo=DATABASE_URL, user_thread_limit=1000, storage_provider=LocalStorageClient(STORAGE_DIR))
# =========================
# OAUTH
# =========================
# === OAUTH ===
@cl.oauth_callback
def oauth_callback(
provider_id: str,
token: str,
raw_user_data: Dict[str, str],
default_user: cl.User,
) -> Optional[cl.User]:
def oauth_callback(provider_id: str, token: str, raw_user_data: Dict[str, str], default_user: cl.User) -> Optional[cl.User]:
if provider_id == "google":
email = raw_user_data.get("email", "").lower()
profile = USER_PROFILES.get(email, GUEST_PROFILE)
default_user.metadata.update(
{
"role": profile["role"],
"workspace": profile["workspace"],
"rag_collection": profile["rag_collection"],
"show_code": profile["show_code"],
"display_name": profile["name"],
}
)
profile = USER_PROFILES.get(email, get_user_profile("guest"))
default_user.metadata.update({
"picture": raw_user_data.get("picture", ""),
"role": profile["role"], "workspace": profile["workspace"],
"rag_collection": profile["rag_collection"], "capabilities": profile["capabilities"],
"show_code": profile["show_code"], "display_name": profile["name"]
})
return default_user
return default_user
def get_user_profile(email: str) -> Dict:
return USER_PROFILES.get(email.lower(), {"role": "guest", "name": "Ospite", "workspace": "guest_workspace", "rag_collection": "documents", "show_code": False})
def create_workspace(workspace_name: str) -> str:
path = os.path.join(WORKSPACES_DIR, workspace_name)
def create_workspace(name: str) -> str:
path = os.path.join(WORKSPACES_DIR, name)
os.makedirs(path, exist_ok=True)
return path
def save_code_to_file(code: str, workspace: str) -> str:
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
path = os.path.join(WORKSPACES_DIR, workspace, f"code_{ts}.py")
with open(path, "w", encoding="utf-8") as f: f.write(code)
return path
# =========================
# CORE: DOCLING
# =========================
def process_file_with_docling(file_path: str) -> str:
# === PARSING DOCUMENTI ===
def extract_text_from_pdf(path: str) -> str:
try:
converter = DocumentConverter()
result = converter.convert(file_path)
return result.document.export_to_markdown()
doc = fitz.open(path)
return "\n".join([page.get_text() for page in doc])
except: return ""
def extract_text_from_excel(path: str) -> str:
"""Estrae testo da Excel convertendo i fogli in Markdown"""
try:
xl = pd.read_excel(path, sheet_name=None)
text_content = []
for sheet, df in xl.items():
text_content.append(f"\n--- Foglio Excel: {sheet} ---\n")
# Pulisce NaN e converte in stringa
clean_df = df.fillna("").astype(str)
text_content.append(clean_df.to_markdown(index=False))
return "\n".join(text_content)
except Exception as e:
print(f"❌ Docling Error: {e}")
print(f"Errore Excel: {e}")
return ""
# =========================
# CORE: BGE-M3 embeddings
# =========================
def get_bge_embeddings(text: str) -> Optional[Dict[str, Any]]:
# === AI & EMBEDDINGS (Remoto) ===
async def get_embeddings(text: str) -> dict:
"""Ritorna dict con keys 'dense' e 'sparse'"""
try:
payload = {"texts": [text[:8000]]}
response = requests.post(BGE_API_URL, json=payload, timeout=30)
response.raise_for_status()
data = response.json().get("data", [])
if data:
return data[0]
return None
async with httpx.AsyncClient(timeout=30.0) as client:
resp = await client.post(
f"{BGE_API_URL}/embed",
json={"texts": [text]}
)
if resp.status_code == 200:
data = resp.json()
# La nuova API ritorna {"data": [{"dense":..., "sparse":...}]}
return data["data"][0]
except Exception as e:
print(f"❌ BGE API Error: {e}")
return None
print(f"⚠️ Errore Embedding: {e}")
return {}
@lru_cache(maxsize=1000)
def get_bge_embeddings_cached(text: str):
return get_bge_embeddings(text)
# =========================
# CORE: QDRANT
# =========================
async def ensure_collection(collection_name: str):
async def ensure_collection(name: str):
client = AsyncQdrantClient(url=QDRANT_URL)
if not await client.collection_exists(collection_name):
if not await client.collection_exists(name):
await client.create_collection(
collection_name=collection_name,
vectors_config={"dense": VectorParams(size=1024, distance=Distance.COSINE)},
sparse_vectors_config={"sparse": SparseVectorParams()},
collection_name=name,
vectors_config={
"bge_dense": VectorParams(size=1024, distance=Distance.COSINE)
},
# ABILITIAMO LO SPARSE VECTOR
sparse_vectors_config={
"bge_sparse": SparseVectorParams(
index=SparseIndexParams(
on_disk=False, # True se hai poca RAM, ma hai 32GB quindi False è meglio
)
)
}
)
def chunk_text_by_lines(text: str, max_chars: int = 2500) -> List[str]:
"""Taglia il testo raggruppando linee intere senza spezzarle."""
lines = text.split('\n')
chunks = []
current_chunk = ""
for line in lines:
# Se la riga è troppo lunga da sola (caso raro), la tagliamo
if len(line) > max_chars:
if current_chunk: chunks.append(current_chunk)
chunks.append(line[:max_chars])
current_chunk = ""
continue
# Se aggiungere la riga supera il limite, salviamo il chunk attuale
if len(current_chunk) + len(line) > max_chars:
chunks.append(current_chunk)
current_chunk = line + "\n"
else:
current_chunk += line + "\n"
if current_chunk:
chunks.append(current_chunk)
return chunks
async def index_document(file_name: str, content: str, collection_name: str) -> int:
await ensure_collection(collection_name)
client = AsyncQdrantClient(url=QDRANT_URL)
async def index_document(filename: str, content: str, collection: str) -> bool:
try:
await ensure_collection(collection)
# --- MODIFICA QUI: Usiamo il chunking intelligente invece di quello brutale ---
# Vecchio: chunks = [content[i:i+3000] for i in range(0, len(content), 3000)]
chunks = chunk_text_by_lines(content, max_chars=2000)
# ---------------------------------------------------------------------------
chunk_size = 2000
overlap = 200
points: List[PointStruct] = []
for i in range(0, len(content), chunk_size - overlap):
chunk = content[i : i + chunk_size]
embedding_data = get_bge_embeddings(chunk)
if embedding_data:
points.append(
PointStruct(
qdrant = AsyncQdrantClient(url=QDRANT_URL)
points = []
for i, chunk in enumerate(chunks):
vectors = await get_embeddings(chunk)
if vectors:
points.append(PointStruct(
id=str(uuid.uuid4()),
vector={"dense": embedding_data["dense"], "sparse": embedding_data["sparse"]},
payload={
"file_name": file_name,
"content": chunk,
"indexed_at": datetime.now().isoformat(),
vector={
"bge_dense": vectors["dense"],
"bge_sparse": models.SparseVector(
indices=vectors["sparse"]["indices"],
values=vectors["sparse"]["values"]
)
},
)
)
payload={"file_name": filename, "content": chunk, "chunk_id": i}
))
if points:
await qdrant.upsert(collection_name=collection, points=points)
return True
except Exception as e:
print(f"Index Error: {e}")
return False
if points:
await client.upsert(collection_name=collection_name, points=points)
return len(points)
return 0
async def search_hybrid(query: str, collection_name: str, limit: int = 4) -> str:
client = AsyncQdrantClient(url=QDRANT_URL)
if not await client.collection_exists(collection_name):
async def search_qdrant(query: str, collection: str) -> str:
try:
client = AsyncQdrantClient(url=QDRANT_URL)
if not await client.collection_exists(collection): return ""
vectors = await get_embeddings(query)
if not vectors: return ""
# HYBRID QUERY (RRF FUSION)
res = await client.query_points(
collection_name=collection,
prefetch=[
models.Prefetch(
query=vectors["dense"],
using="bge_dense",
limit=10,
),
models.Prefetch(
query=models.SparseVector(
indices=vectors["sparse"]["indices"],
values=vectors["sparse"]["values"]
),
using="bge_sparse",
limit=20,
),
],
# --- CORREZIONE QUI SOTTO (da 'method' a 'fusion') ---
query=models.FusionQuery(fusion=models.Fusion.RRF),
limit=12
)
return "\n\n".join([f"📄 {hit.payload['file_name']}:\n{hit.payload['content']}" for hit in res.points if hit.payload])
except Exception as e:
print(f"Search Error: {e}") # Questo è quello che vedevi nei log
return ""
return "\n\n".join([f"📄 {hit.payload['file_name']}:\n{hit.payload['content']}" for hit in res.points if hit.payload])
except Exception as e:
print(f"Search Error: {e}")
return ""
query_emb = get_bge_embeddings(query)
if not query_emb:
return ""
results = await client.query_points(
collection_name=collection_name,
prefetch=[Prefetch(query=query_emb["sparse"], using="sparse", limit=limit * 2)],
query=query_emb["dense"],
using="dense",
limit=limit,
)
context = []
for hit in results.points:
context.append(f"--- DA {hit.payload['file_name']} ---\n{hit.payload['content']}")
return "\n\n".join(context)
# =========================
# UX HELPERS (S-Tier: clarity, consistency)
# =========================
def role_to_badge_class(role: str) -> str:
allowed = {"admin", "engineering", "business", "architecture", "guest"}
return f"dfm-badge--{role}" if role in allowed else "dfm-badge--guest"
def build_system_prompt(system_instruction: str, has_rag: bool, has_files: bool) -> str:
base = (
"Sei un assistente tecnico esperto.\n"
"Obiettivo: rispondere in modo chiaro, preciso e operativo.\n"
"- Se mancano dettagli, fai 1-2 domande mirate.\n"
"- Se scrivi codice, includi snippet piccoli e verificabili.\n"
)
if has_rag:
base += "- Usa il contesto RAG come fonte primaria quando presente.\n"
if has_files:
base += "- Se sono presenti file/immagini, sfrutta le informazioni estratte.\n"
if system_instruction.strip():
base += "\nIstruzione aggiuntiva (utente): " + system_instruction.strip() + "\n"
return base
def extract_code_blocks(text: str) -> List[str]:
return re.findall(r"```(?:\w+)?\n(.*?)```", text, re.DOTALL)
async def log_metrics(metrics: dict):
# Mantieni semplice: stdout (come nella tua versione) [file:6]
print("METRICS:", metrics)
# =========================
# CHAINLIT HANDLERS
# =========================
# === CHAT LOGIC ===
@cl.on_chat_start
async def start():
# 1) Profilo utente
async def on_chat_start():
user = cl.user_session.get("user")
email = user.identifier if user else "guest"
profile = USER_PROFILES.get(email, GUEST_PROFILE)
cl.user_session.set("profile", profile)
if not user:
email = "guest@local"
profile = get_user_profile(email)
else:
email = user.identifier
profile = USER_PROFILES.get(email, get_user_profile("guest"))
cl.user_session.set("email", email)
cl.user_session.set("role", profile["role"])
cl.user_session.set("workspace", profile["workspace"])
cl.user_session.set("rag_collection", profile["rag_collection"])
cl.user_session.set("show_code", profile["show_code"])
create_workspace(profile["workspace"])
role = profile.get("role", "guest")
defaults = ROLE_DEFAULTS.get(role, ROLE_DEFAULTS["guest"])
cl.user_session.set("role_defaults", defaults)
# 2) Badge (HTML controllato; stile via CSS)
badge_html = f"""
<div class="dfm-badge {role_to_badge_class(role)}">
<span><b>{profile['name']}</b></span>
<span style="opacity:.8">{role.upper()}</span>
<span style="opacity:.7">· {profile['workspace']}</span>
</div>
"""
await cl.Message(content=badge_html).send()
# 3) Settings UI (Clarity + sensible defaults)
settings = await cl.ChatSettings(
[
cl.input_widget.Switch(
id="rag_enabled",
label="📚 Usa Conoscenza Documenti",
initial=bool(defaults["rag_enabled"]),
description="Attiva la ricerca nei documenti caricati (consigliato).",
),
cl.input_widget.Slider(
id="top_k",
label="Profondità Ricerca (documenti)",
initial=int(defaults["top_k"]),
min=1,
max=10,
step=1,
description="Più documenti = risposta più completa ma più lenta.",
),
cl.input_widget.Select(
id="model",
label="🤖 Modello AI",
values=MODEL_CHOICES,
initial_value=str(defaults["model"]),
),
cl.input_widget.Slider(
id="temperature",
label="Creatività",
initial=float(defaults["temperature"]),
min=0,
max=1,
step=0.1,
description="Bassa = più precisione (consigliato per codice).",
),
cl.input_widget.Select(
id="vision_detail",
label="🔍 Dettaglio Analisi Immagini",
values=["auto", "low", "high"],
initial_value=str(defaults["vision_detail"]),
),
cl.input_widget.TextInput(
id="system_instruction",
label="✏️ Istruzione Sistema (opzionale)",
initial="",
placeholder="es: Rispondi con bullet points e includi esempi",
description="Personalizza stile/format delle risposte.",
),
]
).send()
cl.user_session.set("settings", settings)
await cl.Message(
content=(
"✅ Ai Station online.\n"
f"• Workspace: `{profile['workspace']}`\n"
f"• Default modello: `{defaults['model']}`\n"
f"• Vision: `{VISION_MODEL}`"
)
).send()
settings = [
cl.input_widget.Select(id="model", label="Modello", values=["glm-4.6:cloud", "llama3"], initial_value="glm-4.6:cloud"),
cl.input_widget.Slider(id="temp", label="Temperatura", initial=0.5, min=0, max=1, step=0.1)
]
if profile["role"] == "admin":
settings.append(cl.input_widget.Switch(id="rag", label="RAG Attivo", initial=True))
await cl.ChatSettings(settings).send()
await cl.Message(content=f"👋 Ciao **{profile['name']}**! Pronto per l'automazione.").send()
@cl.on_settings_update
async def setupagentsettings(settings):
cl.user_session.set("settings", settings)
await cl.Message(
content=(
"✅ Impostazioni aggiornate:\n"
f"• Modello: `{settings.get('model')}`\n"
f"• RAG: {'ON' if settings.get('rag_enabled') else 'OFF'} · top_k={settings.get('top_k')}\n"
f"• Creatività: {settings.get('temperature')}\n"
f"• Vision detail: `{settings.get('vision_detail')}`"
)
).send()
@cl.on_chat_resume
async def on_chat_resume(thread: ThreadDict):
user_identifier = thread.get("userIdentifier")
profile = USER_PROFILES.get(user_identifier, GUEST_PROFILE)
cl.user_session.set("profile", profile)
create_workspace(profile["workspace"])
await cl.Message(content="Bentornato! Riprendiamo da qui.").send()
async def on_settings_update(s): cl.user_session.set("settings", s)
@cl.on_message
async def main(message: cl.Message):
start_time = time.time()
profile = cl.user_session.get("profile", GUEST_PROFILE)
async def on_message(message: cl.Message):
workspace = cl.user_session.get("workspace")
rag_collection = cl.user_session.get("rag_collection")
role = cl.user_session.get("role")
settings = cl.user_session.get("settings", {})
selected_model = settings.get("model", DEFAULT_TEXT_MODEL)
temperature = float(settings.get("temperature", 0.3))
rag_enabled = bool(settings.get("rag_enabled", True))
top_k = int(settings.get("top_k", 4))
vision_detail = settings.get("vision_detail", "auto")
system_instruction = (settings.get("system_instruction", "") or "").strip()
workspace = create_workspace(profile["workspace"])
# 1) Gestione upload (immagini / pdf / docx)
images_for_vision: List[str] = []
doc_context = ""
# 1. FILE UPLOAD (PDF & EXCEL)
if message.elements:
for element in message.elements:
file_path = os.path.join(workspace, element.name)
shutil.copy(element.path, file_path)
for el in message.elements:
dest = os.path.join(WORKSPACES_DIR, workspace, el.name)
shutil.copy(el.path, dest)
content = ""
if el.name.endswith(".pdf"):
content = extract_text_from_pdf(dest)
elif el.name.endswith((".xlsx", ".xls")):
await cl.Message(content=f"📊 Analisi Excel **{el.name}**...").send()
content = extract_text_from_excel(dest)
if content:
ok = await index_document(el.name, content, rag_collection)
icon = "" if ok else ""
await cl.Message(content=f"{icon} **{el.name}** elaborato.").send()
# Immagini
if "image" in (element.mime or ""):
images_for_vision.append(file_path)
msg_img = cl.Message(content=f"🖼️ Analizzo immagine `{element.name}` con `{VISION_MODEL}`...")
await msg_img.send()
try:
with open(file_path, "rb") as imgfile:
imgbytes = imgfile.read()
client_sync = ollama.Client(host=OLLAMA_URL)
res = client_sync.chat(
model=VISION_MODEL,
messages=[
{
"role": "user",
"content": (
"Analizza questa immagine tecnica. "
"Trascrivi testi/codici e descrivi diagrammi o tabelle in dettaglio. "
f"Dettaglio richiesto: {vision_detail}."
),
"images": [imgbytes],
}
],
)
desc = res.get("message", {}).get("content", "")
doc_context += f"\n\n## DESCRIZIONE IMMAGINE: {element.name}\n{desc}\n"
msg_img.content = f"✅ Immagine analizzata: {desc[:300]}..."
await msg_img.update()
except Exception as e:
msg_img.content = f"❌ Errore analisi immagine: {e}"
await msg_img.update()
# Documenti (pdf/docx)
elif element.name.lower().endswith((".pdf", ".docx")):
msg_doc = cl.Message(content=f"📄 Leggo `{element.name}` con Docling (tabelle/formule)...")
await msg_doc.send()
markdown_content = process_file_with_docling(file_path)
if markdown_content:
chunks = await index_document(element.name, markdown_content, profile["rag_collection"])
doc_context += f"\n\n## CONTENUTO FILE: {element.name}\n{markdown_content[:2000]}\n"
msg_doc.content = f"✅ `{element.name}` convertito e indicizzato ({chunks} chunks)."
else:
msg_doc.content = f"❌ Errore lettura `{element.name}`."
await msg_doc.update()
# 2) RAG retrieval (solo se attivo e senza immagini-only flow)
rag_context = ""
if rag_enabled and not images_for_vision:
rag_context = await search_hybrid(message.content, profile["rag_collection"], limit=top_k)
has_rag = bool(rag_context.strip())
has_files = bool(doc_context.strip())
# 3) Prompt building
system_prompt = build_system_prompt(system_instruction, has_rag=has_rag, has_files=has_files)
final_context = ""
if has_rag:
final_context += "\n\n# CONTESTO RAG\n" + rag_context
if has_files:
final_context += "\n\n# CONTESTO FILE SESSIONE\n" + doc_context
# 4) Generazione (stream)
# 2. RAG & GENERATION
rag_active = settings.get("rag", True) if role == "admin" else True
context = await search_qdrant(message.content, rag_collection) if rag_active else ""
prompt = "Sei un esperto di automazione industriale."
if context: prompt += f"\n\nUSA QUESTO CONTESTO (Manuali/Excel):\n{context}"
msg = cl.Message(content="")
await msg.send()
error: Optional[str] = None
try:
client_async = ollama.AsyncClient(host=OLLAMA_URL)
stream = await client_async.chat(
model=selected_model,
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": f"Domanda: {message.content}\n{final_context}"},
],
options={"temperature": temperature},
stream=True,
client = ollama.AsyncClient(host=OLLAMA_URL)
stream = await client.chat(
model=settings.get("model", "glm-4.6:cloud"),
messages=[{"role": "system", "content": prompt}, {"role": "user", "content": message.content}],
options={"temperature": settings.get("temp", 0.5)},
stream=True
)
async for chunk in stream:
content = chunk.get("message", {}).get("content", "")
if content:
await msg.stream_token(content)
await msg.update()
await msg.stream_token(chunk['message']['content'])
except Exception as e:
error = str(e)
await msg.stream_token(f"\n\n❌ Errore AI: {error}")
await msg.update()
# 5) Salvataggio code blocks (solo per profili con show_code)
if profile.get("show_code", False) and msg.content:
codeblocks = extract_code_blocks(msg.content)
if codeblocks:
for i, code in enumerate(codeblocks):
fname = f"script_{datetime.now().strftime('%H%M%S')}_{i}.py"
try:
with open(os.path.join(workspace, fname), "w", encoding="utf-8") as f:
f.write(code.strip())
await cl.Message(content=f"💾 Script salvato: `{fname}`").send()
except Exception as e:
await cl.Message(content=f"❌ Errore salvataggio `{fname}`: {e}").send()
# 6) Metriche
elapsed = time.time() - start_time
metrics = {
"response_time": elapsed,
"rag_hits": rag_context.count("--- DA ") if rag_context else 0,
"model": selected_model,
"user_role": profile.get("role", "unknown"),
"error": error,
}
await log_metrics(metrics)
await msg.stream_token(f"Errore connessione AI: {e}")
await msg.update()

View File

@ -1,3 +1,5 @@
version: '3.8'
services:
chainlit-app:
build: .

View File

@ -27,6 +27,7 @@ sniffio
aiohttp
boto3>=1.28.0
azure-storage-file-datalake>=12.14.0
docling
pillow
requests
# NUOVI PER EXCEL
pandas
openpyxl
tabulate