ai-station/bck/app.py.broken-082810

586 lines
19 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import os
import re
import uuid
import shutil
from datetime import datetime
from typing import Optional, Dict, List
import chainlit as cl
import ollama
import fitz # PyMuPDF
from qdrant_client import AsyncQdrantClient
from qdrant_client.models import PointStruct, Distance, VectorParams
from chainlit.data.sql_alchemy import SQLAlchemyDataLayer
from chainlit.data.storage_clients import BaseStorageClient
# === CONFIGURAZIONE ===
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station")
OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434")
QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333")
WORKSPACES_DIR = "./workspaces"
STORAGE_DIR = "./.files"
os.makedirs(STORAGE_DIR, exist_ok=True)
os.makedirs(WORKSPACES_DIR, exist_ok=True)
# === MAPPING UTENTI E RUOLI ===
USER_PROFILES = {
"giuseppe@defranceschi.pro": {
"role": "admin",
"name": "Giuseppe",
"workspace": "admin_workspace",
"rag_collection": "admin_docs",
"capabilities": ["debug", "system_prompts", "user_management", "all_models"],
"show_code": True
},
"giuseppe.defranceschi@gmail.com": {
"role": "admin",
"name": "Giuseppe",
"workspace": "admin_workspace",
"rag_collection": "admin_docs",
"capabilities": ["debug", "system_prompts", "user_management", "all_models"],
"show_code": True
},
"federica.tecchio@gmail.com": {
"role": "business",
"name": "Federica",
"workspace": "business_workspace",
"rag_collection": "contabilita",
"capabilities": ["pdf_upload", "basic_chat"],
"show_code": False
},
"riccardob545@gmail.com": {
"role": "engineering",
"name": "Riccardo",
"workspace": "engineering_workspace",
"rag_collection": "engineering_docs",
"capabilities": ["code_execution", "data_viz", "advanced_chat"],
"show_code": True
},
"giuliadefranceschi05@gmail.com": {
"role": "architecture",
"name": "Giulia",
"workspace": "architecture_workspace",
"rag_collection": "architecture_manuals",
"capabilities": ["visual_chat", "pdf_upload", "image_gen"],
"show_code": False
}
}
# === CUSTOM LOCAL STORAGE CLIENT ===
class LocalStorageClient(BaseStorageClient):
"""Storage locale su filesystem per file/elementi"""
def __init__(self, storage_path: str):
self.storage_path = storage_path
os.makedirs(storage_path, exist_ok=True)
async def upload_file(
self,
object_key: str,
data: bytes,
mime: str = "application/octet-stream",
overwrite: bool = True,
) -> Dict[str, str]:
"""Salva file localmente"""
file_path = os.path.join(self.storage_path, object_key)
os.makedirs(os.path.dirname(file_path), exist_ok=True)
with open(file_path, "wb") as f:
f.write(data)
return {
"object_key": object_key,
"url": f"/files/{object_key}"
}
# === INIZIALIZZAZIONE DATA LAYER ===
print("🔧 Inizializzazione database...")
storage_client = LocalStorageClient(storage_path=STORAGE_DIR)
async def init_data_layer():
"""Inizializza e crea tabelle database"""
global data_layer
try:
data_layer = SQLAlchemyDataLayer(
conninfo=DATABASE_URL,
storage_provider=storage_client,
user_thread_limit=1000,
show_logger=False
)
# ⬇️ CREA TABELLE SE NON ESISTONO
await data_layer.create_tables()
cl.data_layer = data_layer
print("✅ SQLAlchemyDataLayer + LocalStorage initialized successfully")
print(f"✅ Data layer set: {cl.data_layer is not None}")
except Exception as e:
print(f"❌ Failed to initialize data layer: {e}")
cl.data_layer = None
# === OAUTH CALLBACK CON RUOLI ===
@cl.oauth_callback
def oauth_callback(
provider_id: str,
token: str,
raw_user_data: Dict[str, str],
default_user: cl.User,
) -> Optional[cl.User]:
"""Validazione e arricchimento dati utente con ruoli"""
if provider_id == "google":
email = raw_user_data.get("email", "").lower()
if email not in USER_PROFILES:
print(f"❌ Utente non autorizzato: {email}")
return None
profile = USER_PROFILES[email]
default_user.metadata.update({
"picture": raw_user_data.get("picture", ""),
"locale": raw_user_data.get("locale", "en"),
"role": profile["role"],
"workspace": profile["workspace"],
"rag_collection": profile["rag_collection"],
"capabilities": profile["capabilities"],
"show_code": profile["show_code"],
"display_name": profile["name"]
})
print(f"✅ Utente autorizzato: {email} - Ruolo: {profile['role']}")
return default_user
return default_user
# === UTILITY FUNCTIONS ===
def get_user_profile(user_email: str) -> Dict:
return USER_PROFILES.get(user_email.lower(), {
"role": "guest",
"name": "Ospite",
"workspace": "guest_workspace",
"rag_collection": "documents",
"capabilities": [],
"show_code": False
})
def create_workspace(workspace_name: str) -> str:
workspace_path = os.path.join(WORKSPACES_DIR, workspace_name)
os.makedirs(workspace_path, exist_ok=True)
return workspace_path
def save_code_to_file(code: str, workspace: str) -> str:
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
file_name = f"code_{timestamp}.py"
file_path = os.path.join(WORKSPACES_DIR, workspace, file_name)
with open(file_path, "w", encoding="utf-8") as f:
f.write(code)
return file_path
def extract_text_from_pdf(pdf_path: str) -> str:
try:
doc = fitz.open(pdf_path)
text_parts = []
for page_num in range(len(doc)):
page = doc[page_num]
text = page.get_text()
text_parts.append(f"--- Pagina {page_num + 1} ---\n{text}\n")
doc.close()
return "\n".join(text_parts)
except Exception as e:
print(f"❌ Errore estrazione PDF: {e}")
return ""
# === QDRANT FUNCTIONS ===
async def get_qdrant_client() -> AsyncQdrantClient:
return AsyncQdrantClient(url=QDRANT_URL)
async def ensure_collection(collection_name: str):
client = await get_qdrant_client()
if not await client.collection_exists(collection_name):
await client.create_collection(
collection_name=collection_name,
vectors_config=VectorParams(size=768, distance=Distance.COSINE)
)
async def get_embeddings(text: str) -> list:
max_length = 2000
if len(text) > max_length:
text = text[:max_length]
client = ollama.Client(host=OLLAMA_URL)
try:
response = client.embed(model='nomic-embed-text', input=text)
if 'embeddings' in response:
return response['embeddings'][0]
return response.get('embedding', [])
except Exception as e:
print(f"❌ Errore Embedding: {e}")
return []
def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list:
if len(text) <= max_length:
return [text]
chunks = []
start = 0
while start < len(text):
end = start + max_length
if end < len(text):
last_period = text.rfind('.', start, end)
last_newline = text.rfind('\n', start, end)
split_point = max(last_period, last_newline)
if split_point > start:
end = split_point + 1
chunks.append(text[start:end].strip())
start = end - overlap
return chunks
async def index_document(file_name: str, content: str, collection_name: str) -> bool:
try:
await ensure_collection(collection_name)
chunks = chunk_text(content, max_length=1500)
qdrant_client = await get_qdrant_client()
points = []
for i, chunk in enumerate(chunks):
embeddings = await get_embeddings(chunk)
if not embeddings:
continue
point_id = str(uuid.uuid4())
point = PointStruct(
id=point_id,
vector=embeddings,
payload={
"file_name": file_name,
"content": chunk,
"chunk_index": i,
"total_chunks": len(chunks),
"indexed_at": datetime.now().isoformat()
}
)
points.append(point)
if points:
await qdrant_client.upsert(collection_name=collection_name, points=points)
return True
return False
except Exception as e:
print(f"❌ Errore indicizzazione: {e}")
return False
async def search_qdrant(query_text: str, collection_name: str, limit: int = 5) -> str:
try:
qdrant_client = await get_qdrant_client()
if not await qdrant_client.collection_exists(collection_name):
return ""
query_embedding = await get_embeddings(query_text)
if not query_embedding:
return ""
search_result = await qdrant_client.query_points(
collection_name=collection_name,
query=query_embedding,
limit=limit
)
contexts = []
seen_files = set()
for hit in search_result.points:
if hit.payload:
file_name = hit.payload.get('file_name', 'Unknown')
content = hit.payload.get('content', '')
chunk_idx = hit.payload.get('chunk_index', 0)
score = hit.score if hasattr(hit, 'score') else 0
file_key = f"{file_name}_{chunk_idx}"
if file_key not in seen_files:
seen_files.add(file_key)
# ✅ FIX: 3 backticks, non 6!
contexts.append(
f"📄 **{file_name}** (chunk {chunk_idx+1}, score: {score:.2f})\n"
f"``````"
)
return "\n\n".join(contexts) if contexts else ""
except Exception as e:
print(f"❌ Errore ricerca Qdrant: {e}")
return ""
# === CHAINLIT HANDLERS ===
@cl.on_chat_start
async def on_chat_start():
# ⬇️ Inizializza DB al primo avvio
if not cl.data_layer:
await init_data_layer()
user = cl.user_session.get("user")
if user:
user_email = user.identifier
profile = get_user_profile(user_email)
user_name = profile["name"]
user_role = profile["role"]
workspace = profile["workspace"]
user_picture = user.metadata.get("picture", "")
show_code = profile["show_code"]
capabilities = profile["capabilities"]
else:
user_email = "guest@local"
user_name = "Ospite"
user_role = "guest"
workspace = "guest_workspace"
user_picture = ""
show_code = False
capabilities = []
create_workspace(workspace)
cl.user_session.set("email", user_email)
cl.user_session.set("name", user_name)
cl.user_session.set("role", user_role)
cl.user_session.set("workspace", workspace)
cl.user_session.set("show_code", show_code)
cl.user_session.set("capabilities", capabilities)
cl.user_session.set("rag_collection", profile.get("rag_collection", "documents"))
settings_widgets = [
cl.input_widget.Select(
id="model",
label="🤖 Modello AI",
values=["glm-4.6:cloud", "llama3.2", "mistral", "qwen2.5-coder:32b"],
initial_value="glm-4.6:cloud",
),
cl.input_widget.Slider(
id="temperature",
label="🌡️ Temperatura",
initial=0.7,
min=0,
max=2,
step=0.1,
),
]
if user_role == "admin":
settings_widgets.append(
cl.input_widget.Switch(
id="rag_enabled",
label="📚 Abilita RAG",
initial=True,
)
)
await cl.ChatSettings(settings_widgets).send()
role_emoji = {
"admin": "👑",
"business": "💼",
"engineering": "⚙️",
"architecture": "🏛️",
"guest": "👤"
}
persistence_status = "✅ Attiva" if cl.data_layer else "⚠️ Disattivata"
welcome_msg = f"{role_emoji.get(user_role, '👋')} **Benvenuto, {user_name}!**\n\n"
if user_picture:
welcome_msg += f"![Avatar]({user_picture})\n\n"
welcome_msg += (
f"🎭 **Ruolo**: {user_role.upper()}\n"
f"📁 **Workspace**: `{workspace}`\n"
f"💾 **Persistenza**: {persistence_status}\n"
f"🤖 **Modello**: `glm-4.6:cloud`\n\n"
)
if "debug" in capabilities:
welcome_msg += "🔧 **Modalità Debug**: Attiva\n"
if "user_management" in capabilities:
welcome_msg += "👥 **Gestione Utenti**: Disponibile\n"
if not show_code:
welcome_msg += "🎨 **Modalità Visuale**: Codice nascosto\n"
welcome_msg += "\n⚙ **Usa le Settings (icona ⚙️ in alto a destra) per personalizzare!**"
await cl.Message(content=welcome_msg).send()
@cl.on_settings_update
async def on_settings_update(settings):
cl.user_session.set("settings", settings)
model = settings.get("model", "glm-4.6:cloud")
temp = settings.get("temperature", 0.7)
rag = settings.get("rag_enabled", True)
await cl.Message(
content=f"✅ **Settings aggiornati**:\n"
f"- 🤖 Modello: `{model}`\n"
f"- 🌡️ Temperatura: `{temp}`\n"
f"- 📚 RAG: {'✅ Attivo' if rag else '❌ Disattivato'}"
).send()
@cl.on_message
async def on_message(message: cl.Message):
user_email = cl.user_session.get("email", "guest")
user_role = cl.user_session.get("role", "guest")
workspace = cl.user_session.get("workspace", "guest_workspace")
show_code = cl.user_session.get("show_code", False)
rag_collection = cl.user_session.get("rag_collection", "documents")
settings = cl.user_session.get("settings", {})
model = settings.get("model", "glm-4.6:cloud")
temperature = settings.get("temperature", 0.7)
rag_enabled = settings.get("rag_enabled", True) if user_role == "admin" else True
try:
if message.elements:
await handle_file_uploads(message.elements, workspace, rag_collection)
context_text = ""
if rag_enabled:
context_text = await search_qdrant(message.content, rag_collection, limit=5)
if context_text:
system_prompt = (
"Sei un assistente AI esperto. "
"Usa il seguente contesto per arricchire la tua risposta, "
"ma puoi anche rispondere usando la tua conoscenza generale se il contesto non è sufficiente."
)
full_prompt = f"{system_prompt}\n\n**CONTESTO DOCUMENTI:**\n{context_text}\n\n**DOMANDA UTENTE:**\n{message.content}"
else:
system_prompt = "Sei un assistente AI esperto e disponibile. Rispondi in modo chiaro e utile."
full_prompt = f"{system_prompt}\n\n**DOMANDA UTENTE:**\n{message.content}"
client = ollama.Client(host=OLLAMA_URL)
msg = cl.Message(content="")
await msg.send()
messages = [{"role": "user", "content": full_prompt}]
stream = client.chat(
model=model,
messages=messages,
stream=True,
options={"temperature": temperature}
)
full_response = ""
for chunk in stream:
content = chunk['message']['content']
full_response += content
await msg.stream_token(content)
await msg.update()
# ✅ FIX: Regex corretto con 3 backticks
code_blocks = re.findall(r"``````", full_response, re.DOTALL)
if code_blocks:
elements = []
if not show_code:
cleaned_response = re.sub(
r"``````",
"[💻 Codice eseguito internamente]",
full_response,
flags=re.DOTALL
)
await msg.update(content=cleaned_response)
for code in code_blocks:
file_path = save_code_to_file(code.strip(), workspace)
elements.append(
cl.File(
name=os.path.basename(file_path),
path=file_path,
display="inline" if show_code else "side"
)
)
if show_code:
await cl.Message(
content=f"💾 Codice salvato in workspace `{workspace}`",
elements=elements
).send()
except Exception as e:
await cl.Message(content=f"❌ **Errore:** {str(e)}").send()
async def handle_file_uploads(elements, workspace: str, collection_name: str):
for element in elements:
try:
dest_path = os.path.join(WORKSPACES_DIR, workspace, element.name)
shutil.copy(element.path, dest_path)
content = None
if element.name.lower().endswith('.pdf'):
await cl.Message(content=f"📄 Elaborazione PDF **{element.name}**...").send()
content = extract_text_from_pdf(dest_path)
if not content:
await cl.Message(
content=f"⚠️ **{element.name}**: PDF vuoto o non leggibile"
).send()
continue
elif element.name.lower().endswith('.txt'):
with open(dest_path, 'r', encoding='utf-8') as f:
content = f.read()
else:
await cl.Message(
content=f"📁 **{element.name}** salvato in workspace (supportati: .pdf, .txt)"
).send()
continue
if content:
success = await index_document(element.name, content, collection_name)
if success:
word_count = len(content.split())
await cl.Message(
content=f"✅ **{element.name}** indicizzato in `{collection_name}`\n"
f"📊 Parole estratte: {word_count:,}"
).send()
else:
await cl.Message(
content=f"⚠️ Errore indicizzazione **{element.name}**"
).send()
except Exception as e:
await cl.Message(
content=f"❌ Errore con **{element.name}**: {str(e)}"
).send()