import os import re import uuid import shutil from datetime import datetime from typing import Optional, Dict, List import chainlit as cl import ollama import fitz # PyMuPDF from qdrant_client import AsyncQdrantClient from qdrant_client.models import PointStruct, Distance, VectorParams from chainlit.data.sql_alchemy import SQLAlchemyDataLayer from chainlit.data.storage_clients import BaseStorageClient # === CONFIGURAZIONE === DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434") QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333") WORKSPACES_DIR = "./workspaces" STORAGE_DIR = "./.files" os.makedirs(STORAGE_DIR, exist_ok=True) os.makedirs(WORKSPACES_DIR, exist_ok=True) # === MAPPING UTENTI E RUOLI === USER_PROFILES = { "giuseppe@defranceschi.pro": { "role": "admin", "name": "Giuseppe", "workspace": "admin_workspace", "rag_collection": "admin_docs", "capabilities": ["debug", "system_prompts", "user_management", "all_models"], "show_code": True }, "giuseppe.defranceschi@gmail.com": { "role": "admin", "name": "Giuseppe", "workspace": "admin_workspace", "rag_collection": "admin_docs", "capabilities": ["debug", "system_prompts", "user_management", "all_models"], "show_code": True }, "federica.tecchio@gmail.com": { "role": "business", "name": "Federica", "workspace": "business_workspace", "rag_collection": "contabilita", "capabilities": ["pdf_upload", "basic_chat"], "show_code": False }, "riccardob545@gmail.com": { "role": "engineering", "name": "Riccardo", "workspace": "engineering_workspace", "rag_collection": "engineering_docs", "capabilities": ["code_execution", "data_viz", "advanced_chat"], "show_code": True }, "giuliadefranceschi05@gmail.com": { "role": "architecture", "name": "Giulia", "workspace": "architecture_workspace", "rag_collection": "architecture_manuals", "capabilities": ["visual_chat", "pdf_upload", "image_gen"], "show_code": False } } # === CUSTOM LOCAL STORAGE CLIENT === class LocalStorageClient(BaseStorageClient): """Storage locale su filesystem per file/elementi""" def __init__(self, storage_path: str): self.storage_path = storage_path os.makedirs(storage_path, exist_ok=True) async def upload_file( self, object_key: str, data: bytes, mime: str = "application/octet-stream", overwrite: bool = True, ) -> Dict[str, str]: """Salva file localmente""" file_path = os.path.join(self.storage_path, object_key) os.makedirs(os.path.dirname(file_path), exist_ok=True) with open(file_path, "wb") as f: f.write(data) return { "object_key": object_key, "url": f"/files/{object_key}" } # === INIZIALIZZAZIONE DATA LAYER === print("πŸ”§ Inizializzazione database...") storage_client = LocalStorageClient(storage_path=STORAGE_DIR) async def init_data_layer(): """Inizializza e crea tabelle database""" global data_layer try: data_layer = SQLAlchemyDataLayer( conninfo=DATABASE_URL, storage_provider=storage_client, user_thread_limit=1000, show_logger=False ) # ⬇️ CREA TABELLE SE NON ESISTONO await data_layer.create_tables() cl.data_layer = data_layer print("βœ… SQLAlchemyDataLayer + LocalStorage initialized successfully") print(f"βœ… Data layer set: {cl.data_layer is not None}") except Exception as e: print(f"❌ Failed to initialize data layer: {e}") cl.data_layer = None # === OAUTH CALLBACK CON RUOLI === @cl.oauth_callback def oauth_callback( provider_id: str, token: str, raw_user_data: Dict[str, str], default_user: cl.User, ) -> Optional[cl.User]: """Validazione e arricchimento dati utente con ruoli""" if provider_id == "google": email = raw_user_data.get("email", "").lower() if email not in USER_PROFILES: print(f"❌ Utente non autorizzato: {email}") return None profile = USER_PROFILES[email] default_user.metadata.update({ "picture": raw_user_data.get("picture", ""), "locale": raw_user_data.get("locale", "en"), "role": profile["role"], "workspace": profile["workspace"], "rag_collection": profile["rag_collection"], "capabilities": profile["capabilities"], "show_code": profile["show_code"], "display_name": profile["name"] }) print(f"βœ… Utente autorizzato: {email} - Ruolo: {profile['role']}") return default_user return default_user # === UTILITY FUNCTIONS === def get_user_profile(user_email: str) -> Dict: return USER_PROFILES.get(user_email.lower(), { "role": "guest", "name": "Ospite", "workspace": "guest_workspace", "rag_collection": "documents", "capabilities": [], "show_code": False }) def create_workspace(workspace_name: str) -> str: workspace_path = os.path.join(WORKSPACES_DIR, workspace_name) os.makedirs(workspace_path, exist_ok=True) return workspace_path def save_code_to_file(code: str, workspace: str) -> str: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"code_{timestamp}.py" file_path = os.path.join(WORKSPACES_DIR, workspace, file_name) with open(file_path, "w", encoding="utf-8") as f: f.write(code) return file_path def extract_text_from_pdf(pdf_path: str) -> str: try: doc = fitz.open(pdf_path) text_parts = [] for page_num in range(len(doc)): page = doc[page_num] text = page.get_text() text_parts.append(f"--- Pagina {page_num + 1} ---\n{text}\n") doc.close() return "\n".join(text_parts) except Exception as e: print(f"❌ Errore estrazione PDF: {e}") return "" # === QDRANT FUNCTIONS === async def get_qdrant_client() -> AsyncQdrantClient: return AsyncQdrantClient(url=QDRANT_URL) async def ensure_collection(collection_name: str): client = await get_qdrant_client() if not await client.collection_exists(collection_name): await client.create_collection( collection_name=collection_name, vectors_config=VectorParams(size=768, distance=Distance.COSINE) ) async def get_embeddings(text: str) -> list: max_length = 2000 if len(text) > max_length: text = text[:max_length] client = ollama.Client(host=OLLAMA_URL) try: response = client.embed(model='nomic-embed-text', input=text) if 'embeddings' in response: return response['embeddings'][0] return response.get('embedding', []) except Exception as e: print(f"❌ Errore Embedding: {e}") return [] def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list: if len(text) <= max_length: return [text] chunks = [] start = 0 while start < len(text): end = start + max_length if end < len(text): last_period = text.rfind('.', start, end) last_newline = text.rfind('\n', start, end) split_point = max(last_period, last_newline) if split_point > start: end = split_point + 1 chunks.append(text[start:end].strip()) start = end - overlap return chunks async def index_document(file_name: str, content: str, collection_name: str) -> bool: try: await ensure_collection(collection_name) chunks = chunk_text(content, max_length=1500) qdrant_client = await get_qdrant_client() points = [] for i, chunk in enumerate(chunks): embeddings = await get_embeddings(chunk) if not embeddings: continue point_id = str(uuid.uuid4()) point = PointStruct( id=point_id, vector=embeddings, payload={ "file_name": file_name, "content": chunk, "chunk_index": i, "total_chunks": len(chunks), "indexed_at": datetime.now().isoformat() } ) points.append(point) if points: await qdrant_client.upsert(collection_name=collection_name, points=points) return True return False except Exception as e: print(f"❌ Errore indicizzazione: {e}") return False async def search_qdrant(query_text: str, collection_name: str, limit: int = 5) -> str: try: qdrant_client = await get_qdrant_client() if not await qdrant_client.collection_exists(collection_name): return "" query_embedding = await get_embeddings(query_text) if not query_embedding: return "" search_result = await qdrant_client.query_points( collection_name=collection_name, query=query_embedding, limit=limit ) contexts = [] seen_files = set() for hit in search_result.points: if hit.payload: file_name = hit.payload.get('file_name', 'Unknown') content = hit.payload.get('content', '') chunk_idx = hit.payload.get('chunk_index', 0) score = hit.score if hasattr(hit, 'score') else 0 file_key = f"{file_name}_{chunk_idx}" if file_key not in seen_files: seen_files.add(file_key) # βœ… FIX: 3 backticks, non 6! contexts.append( f"πŸ“„ **{file_name}** (chunk {chunk_idx+1}, score: {score:.2f})\n" f"``````" ) return "\n\n".join(contexts) if contexts else "" except Exception as e: print(f"❌ Errore ricerca Qdrant: {e}") return "" # === CHAINLIT HANDLERS === @cl.on_chat_start async def on_chat_start(): # ⬇️ Inizializza DB al primo avvio if not cl.data_layer: await init_data_layer() user = cl.user_session.get("user") if user: user_email = user.identifier profile = get_user_profile(user_email) user_name = profile["name"] user_role = profile["role"] workspace = profile["workspace"] user_picture = user.metadata.get("picture", "") show_code = profile["show_code"] capabilities = profile["capabilities"] else: user_email = "guest@local" user_name = "Ospite" user_role = "guest" workspace = "guest_workspace" user_picture = "" show_code = False capabilities = [] create_workspace(workspace) cl.user_session.set("email", user_email) cl.user_session.set("name", user_name) cl.user_session.set("role", user_role) cl.user_session.set("workspace", workspace) cl.user_session.set("show_code", show_code) cl.user_session.set("capabilities", capabilities) cl.user_session.set("rag_collection", profile.get("rag_collection", "documents")) settings_widgets = [ cl.input_widget.Select( id="model", label="πŸ€– Modello AI", values=["glm-4.6:cloud", "llama3.2", "mistral", "qwen2.5-coder:32b"], initial_value="glm-4.6:cloud", ), cl.input_widget.Slider( id="temperature", label="🌑️ Temperatura", initial=0.7, min=0, max=2, step=0.1, ), ] if user_role == "admin": settings_widgets.append( cl.input_widget.Switch( id="rag_enabled", label="πŸ“š Abilita RAG", initial=True, ) ) await cl.ChatSettings(settings_widgets).send() role_emoji = { "admin": "πŸ‘‘", "business": "πŸ’Ό", "engineering": "βš™οΈ", "architecture": "πŸ›οΈ", "guest": "πŸ‘€" } persistence_status = "βœ… Attiva" if cl.data_layer else "⚠️ Disattivata" welcome_msg = f"{role_emoji.get(user_role, 'πŸ‘‹')} **Benvenuto, {user_name}!**\n\n" if user_picture: welcome_msg += f"![Avatar]({user_picture})\n\n" welcome_msg += ( f"🎭 **Ruolo**: {user_role.upper()}\n" f"πŸ“ **Workspace**: `{workspace}`\n" f"πŸ’Ύ **Persistenza**: {persistence_status}\n" f"πŸ€– **Modello**: `glm-4.6:cloud`\n\n" ) if "debug" in capabilities: welcome_msg += "πŸ”§ **ModalitΓ  Debug**: Attiva\n" if "user_management" in capabilities: welcome_msg += "πŸ‘₯ **Gestione Utenti**: Disponibile\n" if not show_code: welcome_msg += "🎨 **ModalitΓ  Visuale**: Codice nascosto\n" welcome_msg += "\nβš™οΈ **Usa le Settings (icona βš™οΈ in alto a destra) per personalizzare!**" await cl.Message(content=welcome_msg).send() @cl.on_settings_update async def on_settings_update(settings): cl.user_session.set("settings", settings) model = settings.get("model", "glm-4.6:cloud") temp = settings.get("temperature", 0.7) rag = settings.get("rag_enabled", True) await cl.Message( content=f"βœ… **Settings aggiornati**:\n" f"- πŸ€– Modello: `{model}`\n" f"- 🌑️ Temperatura: `{temp}`\n" f"- πŸ“š RAG: {'βœ… Attivo' if rag else '❌ Disattivato'}" ).send() @cl.on_message async def on_message(message: cl.Message): user_email = cl.user_session.get("email", "guest") user_role = cl.user_session.get("role", "guest") workspace = cl.user_session.get("workspace", "guest_workspace") show_code = cl.user_session.get("show_code", False) rag_collection = cl.user_session.get("rag_collection", "documents") settings = cl.user_session.get("settings", {}) model = settings.get("model", "glm-4.6:cloud") temperature = settings.get("temperature", 0.7) rag_enabled = settings.get("rag_enabled", True) if user_role == "admin" else True try: if message.elements: await handle_file_uploads(message.elements, workspace, rag_collection) context_text = "" if rag_enabled: context_text = await search_qdrant(message.content, rag_collection, limit=5) if context_text: system_prompt = ( "Sei un assistente AI esperto. " "Usa il seguente contesto per arricchire la tua risposta, " "ma puoi anche rispondere usando la tua conoscenza generale se il contesto non Γ¨ sufficiente." ) full_prompt = f"{system_prompt}\n\n**CONTESTO DOCUMENTI:**\n{context_text}\n\n**DOMANDA UTENTE:**\n{message.content}" else: system_prompt = "Sei un assistente AI esperto e disponibile. Rispondi in modo chiaro e utile." full_prompt = f"{system_prompt}\n\n**DOMANDA UTENTE:**\n{message.content}" client = ollama.Client(host=OLLAMA_URL) msg = cl.Message(content="") await msg.send() messages = [{"role": "user", "content": full_prompt}] stream = client.chat( model=model, messages=messages, stream=True, options={"temperature": temperature} ) full_response = "" for chunk in stream: content = chunk['message']['content'] full_response += content await msg.stream_token(content) await msg.update() # βœ… FIX: Regex corretto con 3 backticks code_blocks = re.findall(r"``````", full_response, re.DOTALL) if code_blocks: elements = [] if not show_code: cleaned_response = re.sub( r"``````", "[πŸ’» Codice eseguito internamente]", full_response, flags=re.DOTALL ) await msg.update(content=cleaned_response) for code in code_blocks: file_path = save_code_to_file(code.strip(), workspace) elements.append( cl.File( name=os.path.basename(file_path), path=file_path, display="inline" if show_code else "side" ) ) if show_code: await cl.Message( content=f"πŸ’Ύ Codice salvato in workspace `{workspace}`", elements=elements ).send() except Exception as e: await cl.Message(content=f"❌ **Errore:** {str(e)}").send() async def handle_file_uploads(elements, workspace: str, collection_name: str): for element in elements: try: dest_path = os.path.join(WORKSPACES_DIR, workspace, element.name) shutil.copy(element.path, dest_path) content = None if element.name.lower().endswith('.pdf'): await cl.Message(content=f"πŸ“„ Elaborazione PDF **{element.name}**...").send() content = extract_text_from_pdf(dest_path) if not content: await cl.Message( content=f"⚠️ **{element.name}**: PDF vuoto o non leggibile" ).send() continue elif element.name.lower().endswith('.txt'): with open(dest_path, 'r', encoding='utf-8') as f: content = f.read() else: await cl.Message( content=f"πŸ“ **{element.name}** salvato in workspace (supportati: .pdf, .txt)" ).send() continue if content: success = await index_document(element.name, content, collection_name) if success: word_count = len(content.split()) await cl.Message( content=f"βœ… **{element.name}** indicizzato in `{collection_name}`\n" f"πŸ“Š Parole estratte: {word_count:,}" ).send() else: await cl.Message( content=f"⚠️ Errore indicizzazione **{element.name}**" ).send() except Exception as e: await cl.Message( content=f"❌ Errore con **{element.name}**: {str(e)}" ).send()