diff --git a/.chainlit/config.toml b/.chainlit/config.toml index bb4a667f..f17928b0 100644 --- a/.chainlit/config.toml +++ b/.chainlit/config.toml @@ -1,157 +1,92 @@ [project] -# List of environment variables to be provided by each user to use the app. +# Nessuna API key richiesta agli utenti user_env = [] -# Duration (in seconds) during which the session is saved when the connection is lost -session_timeout = 3600 +# Sessioni lunghe per comoditร  +session_timeout = 7200 # 2 ore +user_session_timeout = 2592000 # 30 giorni (come Perplexity Pro) -# Duration (in seconds) of the user session expiry -user_session_timeout = 1296000 # 15 days - -# Enable third parties caching (e.g., LangChain cache) +# No cache esterno cache = false -# Whether to persist user environment variables (API keys) to the database -# Set to true to store user env vars in DB, false to exclude them for security +# Security persist_user_env = false +mask_user_env = true -# Whether to mask user environment variables (API keys) in the UI with password type -# Set to true to show API keys as ***, false to show them as plain text -mask_user_env = false - -# Authorized origins +# CORS permissivo per OAuth allow_origins = ["*"] [features] -# Process and display HTML in messages. This can be a security risk (see https://stackoverflow.com/questions/19603097/why-is-it-dangerous-to-render-user-generated-html-or-javascript) +# HTML disabilitato per sicurezza unsafe_allow_html = false -# Process and display mathematical expressions. This can clash with "$" characters in messages. -latex = false +# LaTeX abilitato per formule matematiche +latex = true -# Autoscroll new user messages at the top of the window +# UX ottimizzata user_message_autoscroll = true - -# Automatically tag threads with the current chat profile (if a chat profile is used) auto_tag_thread = true - -# Allow users to edit their own messages edit_message = true -# Allow users to share threads (backend + UI). Requires an app-defined on_shared_thread_view callback. +# Thread sharing disabilitato (per ora) allow_thread_sharing = false -[features.slack] -# Add emoji reaction when message is received (requires reactions:write OAuth scope) -reaction_on_message_received = false - -# Authorize users to spontaneously upload files with messages [features.spontaneous_file_upload] - enabled = true - # Define accepted file types using MIME types - # Examples: - # 1. For specific file types: - # accept = ["image/jpeg", "image/png", "application/pdf"] - # 2. For all files of certain type: - # accept = ["image/*", "audio/*", "video/*"] - # 3. For specific file extensions: - # accept = { "application/octet-stream" = [".xyz", ".pdb"] } - # Note: Using "*/*" is not recommended as it may cause browser warnings - accept = ["*/*"] - max_files = 20 - max_size_mb = 500 +enabled = true +# Solo PDF e TXT per RAG +accept = ["application/pdf", "text/plain", "image/png", "image/jpeg"] +max_files = 10 +max_size_mb = 100 [features.audio] - # Enable audio features - enabled = false - # Sample rate of the audio - sample_rate = 24000 - -[features.mcp] - # Enable Model Context Protocol (MCP) features - enabled = false - -[features.mcp.sse] - enabled = true - -[features.mcp.streamable-http] - enabled = true - -[features.mcp.stdio] - enabled = true - # Only the executables in the allow list can be used for MCP stdio server. - # Only need the base name of the executable, e.g. "npx", not "/usr/bin/npx". - # Please don't comment this line for now, we need it to parse the executable name. - allowed_executables = [ "npx", "uvx" ] +# Audio disabilitato (futuro: voice chat) +enabled = false +sample_rate = 24000 [UI] -# Name of the assistant. -name = "Assistant" +# Nome branding +name = "Dfm AI Station" -# default_theme = "dark" +# Tema dark di default (come Perplexity) +default_theme = "dark" -# Force a specific language for all users (e.g., "en-US", "he-IL", "fr-FR") -# If not set, the browser's language will be used -# language = "en-US" +# Layout wide per piรน spazio +layout = "wide" -# layout = "wide" +# Sidebar aperta di default +default_sidebar_state = "open" -# default_sidebar_state = "open" +# Descrizione per SEO +description = "AI Station powered by dFm - Assistente AI con RAG per analisi documentale e supporto tecnico" -# Description of the assistant. This is used for HTML tags. -# description = "" +# Chain of Thought: mostra solo tool calls (pulito) +cot = "tool_call" -# Chain of Thought (CoT) display mode. Can be "hidden", "tool_call" or "full". -cot = "full" +# Alert moderni +alert_style = "modern" -# Specify a CSS file that can be used to customize the user interface. -# The CSS file can be served from the public directory or via an external link. -# custom_css = "/public/test.css" +# CSS Custom (stile Perplexity) +custom_css = "/public/custom.css" -# Specify additional attributes for a custom CSS file -# custom_css_attributes = "media=\"print\"" +# Logo e Avatar +logo_file_url = "/public/images/logo2.png" +default_avatar_file_url = "/public/images/fav4.png" -# Specify a JavaScript file that can be used to customize the user interface. -# The JavaScript file can be served from the public directory. -# custom_js = "/public/test.js" +# Meta tags per sharing +custom_meta_image_url = "/public/images/logo2.png" -# The style of alert boxes. Can be "classic" or "modern". -alert_style = "classic" +# Header links +[[UI.header_links]] +name = "dFm Website" +display_name = "๐Ÿ  DEFRA WOOD MAKER" +url = "https://www.dffm.it" +target = "_blank" -# Specify additional attributes for custom JS file -# custom_js_attributes = "async type = \"module\"" - -# Custom login page image, relative to public directory or external URL -# login_page_image = "/public/custom-background.jpg" - -# Custom login page image filter (Tailwind internal filters, no dark/light variants) -# login_page_image_filter = "brightness-50 grayscale" -# login_page_image_dark_filter = "contrast-200 blur-sm" - -# Specify a custom meta URL (used for meta tags like og:url) -# custom_meta_url = "https://github.com/Chainlit/chainlit" - -# Specify a custom meta image url. -# custom_meta_image_url = "https://chainlit-cloud.s3.eu-west-3.amazonaws.com/logo/chainlit_banner.png" - -# Load assistant logo directly from URL. -logo_file_url = "" - -# Load assistant avatar image directly from URL. -default_avatar_file_url = "" - -# Specify a custom build directory for the frontend. -# This can be used to customize the frontend code. -# Be careful: If this is a relative path, it should not start with a slash. -# custom_build = "./public/build" - -# Specify optional one or more custom links in the header. -# [[UI.header_links]] -# name = "Issues" -# display_name = "Report Issue" -# icon_url = "https://avatars.githubusercontent.com/u/128686189?s=200&v=4" -# url = "https://github.com/Chainlit/chainlit/issues" -# target = "_blank" (default) # Optional: "_self", "_parent", "_top". +[[UI.header_links]] +name = "Docs" +display_name = "๐Ÿ“š Guida" +url = "/public/docs.html" +target = "_self" [meta] -generated_by = "2.9.3" +generated_by = "1.3.2" diff --git a/Dockerfile b/Dockerfile index 5291641c..0ff6ac6b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,5 +21,9 @@ RUN mkdir -p /app/workspaces /app/public /app/.files EXPOSE 8000 +# Healthcheck +HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ + CMD python -c "import httpx; httpx.get('http://localhost:8000/health', timeout=5)" || exit 1 + # Script di avvio con inizializzazione DB CMD python init_db.py && chainlit run app.py --host 0.0.0.0 --port 8000 diff --git a/PROMPT_V2.md b/PROMPT_V2.md index 7099007e..6f434719 100644 --- a/PROMPT_V2.md +++ b/PROMPT_V2.md @@ -2,9 +2,6 @@ AI-STATION: Specifica Tecnica Implementativa (Target Stack: Chainlit) 1. Obiettivo Sviluppare un'applicazione web Dockerizzata ("AI Station") che funga da hub multi-utente per l'IA. L'app deve presentarsi come una Chat Interface avanzata (stile ChatGPT/Claude) con capacitร  "Artifacts" (visualizzazione elementi a lato) e RAG. -2. Stack Tecnologico (Obbligatorio) -Sostituire Marimo con Chainlit per il Frontend. - Frontend/UI: Chainlit (Python). Interfaccia Chat + Elements. Backend Logic: FastAPI (embedded in Chainlit o separate). Auth: oauth2-proxy in Docker che inietta header X-Email e X-User-Role. @@ -25,10 +22,6 @@ Smart Business (Moglie): Chat semplice. Bottone "Upload PDF" (che va in RAG su Q Engineering (Figlio): Chat + Visualizzatore Dati. Supporto Python code execution sandboxed (se possibile, o solo spiegazione). Profilo "Architecture" (Figlia - Studente): Chat Visuale. Focus su storia dell'arte, normative edilizie, RAG su manuali di architettura, generazione idee Power User (Tu): Accesso a cl.Command per debug, possibilitร  di vedere i prompt di sistema. -6. Istruzioni per Aider -Creare struttura docker-compose.yml con servizi: chainlit-app, qdrant, postgres, oauth2-proxy. -Implementare app.py (main Chainlit) che gestisce l'autenticazione via header. -Configurare il client Ollama remoto. Implementare un semplice sistema di RAG usando LangChain o LlamaIndex integrato in Chainlit. Perchรฉ questo prompt funzionerร  meglio: Definisce la tecnologia: Dice esplicitamente "Usa Chainlit". Aider sa perfettamente come strutturare un progetto Chainlit (file .chainlit/config, cartella .chainlit/data). diff --git a/README.md b/README.md index 53c1d24a..92192137 100644 --- a/README.md +++ b/README.md @@ -1,77 +1,325 @@ -text -# AI Station - Multi-User AI Hub +# AI Station - Document Analysis Platform -Piattaforma AI dockerizzata con RAG (Retrieval-Augmented Generation) per uso familiare e professionale. +## ๐Ÿ“‹ Overview -## Stack Tecnologico +**AI Station** รจ una piattaforma di analisi documentale basata su AI che utilizza **Retrieval-Augmented Generation (RAG)** per analizzare PDF e documenti testuali con il modello **GLM-4.6:Cloud**. -- **Frontend/UI**: Chainlit 1.3.2 -- **Vector DB**: Qdrant -- **Database**: PostgreSQL 15 -- **AI Engine**: Ollama (qwen2.5-coder:7b) su RTX A1000 -- **Reverse Proxy**: Nginx Proxy Manager -- **SSL**: Wildcard *.dffm.it +### Stack Tecnologico +- **Backend**: Python + Chainlit (LLM UI framework) +- **LLM**: GLM-4.6:Cloud (via Ollama Cloud) +- **Vector DB**: Qdrant (semantic search) +- **PDF Processing**: PyMuPDF (fitz) +- **Database**: PostgreSQL + SQLAlchemy ORM +- **Containerization**: Docker Compose +- **Embeddings**: nomic-embed-text (via Ollama local) -## Architettura +--- -Internet โ†’ pfSense (192.168.1.254) -โ†“ -Nginx Proxy (192.168.1.252) โ†’ https://ai.dffm.it -โ†“ -AI-SRV (192.168.1.244:8000) โ†’ Docker Compose -โ”œโ”€โ”€ Chainlit App -โ”œโ”€โ”€ PostgreSQL -โ””โ”€โ”€ Qdrant -โ†“ -AI-GPU (192.168.1.243:11434) โ†’ Ollama + RTX A1000 +## ๐Ÿš€ Quick Start -text +### Prerequisites +- Docker & Docker Compose +- Ollama installed locally (for embeddings) +- Ollama Cloud account (for glm-4.6:cloud) -## Quick Start - -Clone repository -git clone https://github.com/TUO_USERNAME/ai-station.git +### 1๏ธโƒฃ Clone & Setup +```bash +git clone git@github.com:your-username/ai-station.git cd ai-station -Configura environment variables -cp .env.example .env -nano .env +# Configure environment +cat > .env << 'EOF' +DATABASE_URL=postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station +OLLAMA_URL=http://192.168.1.243:11434 +QDRANT_URL=http://qdrant:6333 +EOF +``` -Avvia stack +### 2๏ธโƒฃ Authenticate Ollama Cloud +```bash +ollama signin +# Follow the link to authenticate with your Ollama account +``` + +### 3๏ธโƒฃ Start Services +```bash docker compose up -d +docker compose logs -f chainlit-app +``` -Verifica logs +### 4๏ธโƒฃ Access UI +Navigate to: **http://localhost:8000** + +--- + +## ๐Ÿ“ Project Structure + +``` +ai-station/ +โ”œโ”€โ”€ app.py # Main Chainlit application +โ”œโ”€โ”€ requirements.txt # Python dependencies +โ”œโ”€โ”€ docker-compose.yml # Docker services config +โ”œโ”€โ”€ .env # Environment variables (gitignored) +โ”œโ”€โ”€ workspaces/ # User workspace directories +โ”‚ โ””โ”€โ”€ admin/ # Admin user files +โ””โ”€โ”€ README.md # This file +``` + +--- + +## ๐Ÿ”ง Features + +### โœ… Implemented +- **PDF Upload & Processing**: Extract text from PDF documents using PyMuPDF +- **Document Indexing**: Automatic chunking and semantic indexing via Qdrant +- **RAG Search**: Retrieve relevant document chunks based on semantic similarity +- **Intelligent Analysis**: GLM-4.6:Cloud analyzes documents with full context +- **Code Extraction**: Automatically save Python code blocks from responses +- **Chat History**: Persistent conversation storage via SQLAlchemy +- **Streaming Responses**: Real-time token streaming via Chainlit + +### ๐Ÿ”„ Workflow +1. User uploads PDF or TXT file +2. System extracts text and creates semantic chunks +3. Chunks indexed in Qdrant vector database +4. User asks questions about documents +5. RAG retrieves relevant chunks +6. GLM-4.6:Cloud analyzes with full context +7. Streaming response to user + +--- + +## ๐Ÿ“Š Technical Details + +### Document Processing Pipeline + +``` +PDF Upload + โ†“ +PyMuPDF Text Extraction + โ†“ +Text Chunking (1500 chars, 200 char overlap) + โ†“ +nomic-embed-text Embeddings (Ollama local) + โ†“ +Qdrant Vector Storage + โ†“ +Semantic Search on User Query + โ†“ +GLM-4.6:Cloud Analysis with RAG Context + โ†“ +Chainlit Streaming Response +``` + +### Key Functions + +| Function | Purpose | +|----------|---------| +| `extract_text_from_pdf()` | Convert PDF to text using PyMuPDF | +| `chunk_text()` | Split text into overlapping chunks | +| `get_embeddings()` | Generate embeddings via Ollama | +| `index_document()` | Store chunks in Qdrant | +| `search_qdrant()` | Retrieve relevant context | +| `on_message()` | Process user queries with RAG | + +--- + +## ๐Ÿ” Environment Variables + +```env +DATABASE_URL=postgresql+asyncpg://user:pass@postgres:5432/ai_station +OLLAMA_URL=http://192.168.1.243:11434 # Local Ollama for embeddings +QDRANT_URL=http://qdrant:6333 # Vector database +``` + +**Note**: GLM-4.6:Cloud authentication is handled automatically via `ollama signin` + +--- + +## ๐Ÿณ Docker Services + +| Service | Port | Purpose | +|---------|------|---------| +| `chainlit-app` | 8000 | Chainlit UI & API | +| `postgres` | 5432 | Conversation persistence | +| `qdrant` | 6333 | Vector database | +| `ollama` | 11434 | Local embeddings (external) | + +Start/Stop: +```bash +docker compose up -d # Start all services +docker compose down # Stop all services +docker compose logs -f # View logs +docker compose restart # Restart services +``` + +--- + +## ๐Ÿ“ Usage Examples + +### Example 1: Analyze Tax Document +``` +User: "Qual รจ l'importo totale del documento?" +AI Station: + โœ… Extracts PDF content + โœ… Searches relevant sections + โœ… Analyzes with GLM-4.6:Cloud + ๐Ÿ“„ Returns: "Based on the document, the total amount is..." +``` + +### Example 2: Multi-Document Analysis +``` +1. Upload multiple PDFs (invoices, contracts) +2. All documents automatically indexed +3. Query across all documents simultaneously +4. RAG retrieves most relevant chunks +5. GLM-4.6:Cloud synthesizes answer +``` + +--- + +## ๐Ÿ› ๏ธ Development + +### Install Dependencies +```bash +pip install -r requirements.txt +``` + +### Requirements +``` +chainlit==1.3.2 +pydantic==2.9.2 +ollama>=0.1.0 +asyncpg>=0.29.0 +psycopg2-binary +qdrant-client>=1.10.0 +sqlalchemy>=2.0.0 +greenlet>=3.0.0 +sniffio +aiohttp +alembic +pymupdf +python-dotenv +``` + +### Local Testing (without Docker) +```bash +# Start Ollama, PostgreSQL, Qdrant manually +ollama serve & +chainlit run app.py +``` + +--- + +## ๐Ÿ”„ Model Details + +### GLM-4.6:Cloud +- **Provider**: Zhipu AI via Ollama Cloud +- **Capabilities**: Long context, reasoning, multilingual +- **Cost**: Free tier available +- **Authentication**: Device key (automatic via `ollama signin`) + +### nomic-embed-text +- **Local embedding model** for chunking/retrieval +- **Dimensions**: 768 +- **Speed**: Fast, runs locally +- **Used for**: RAG semantic search + +--- + +## ๐Ÿ“ˆ Monitoring & Logs + +### Check Service Health +```bash +# View all logs +docker compose logs + +# Follow live logs docker compose logs -f chainlit-app -text +# Check specific container +docker inspect ai-station-chainlit-app +``` -## Accesso +### Common Issues +| Issue | Solution | +|-------|----------| +| `unauthorized` error | Run `ollama signin` on server | +| Database connection failed | Check PostgreSQL is running | +| Qdrant unavailable | Verify `docker-compose up` completed | +| PDF not extracted | Ensure PyMuPDF installed: `pip install pymupdf` | -- **Locale**: http://192.168.1.244:8000 -- **Remoto**: https://ai.dffm.it +--- -## Funzionalitร  Attuali +## ๐Ÿš€ Deployment -โœ… Chat AI con streaming responses -โœ… RAG con upload documenti .txt -โœ… Indicizzazione automatica su Qdrant -โœ… WebSocket support -โœ… Accesso SSL remoto +### Production Checklist +- [ ] Set secure PostgreSQL credentials in `.env` +- [ ] Enable SSL/TLS for Chainlit endpoints +- [ ] Configure CORS for frontend +- [ ] Setup log aggregation (ELK, Datadog, etc.) +- [ ] Implement rate limiting +- [ ] Add API authentication +- [ ] Configure backup strategy for Qdrant -## Roadmap +### Cloud Deployment Options +- **AWS**: ECS + RDS + VectorDB +- **Google Cloud**: Cloud Run + Cloud SQL +- **DigitalOcean**: App Platform + Managed Databases -- [ ] Supporto PDF per documenti fiscali -- [ ] OAuth2 multi-utente -- [ ] UI personalizzate per profili (business/engineering/architecture/admin) -- [ ] Integrazione Google Gemini -- [ ] Persistenza conversazioni +--- -## Requisiti +## ๐Ÿ“š API Reference -- Docker & Docker Compose -- 8GB RAM minimo (16GB consigliato) -- Ollama server remoto con GPU +### REST Endpoints (via Chainlit) +- `POST /api/chat` - Send message with context +- `GET /api/threads` - List conversations +- `POST /api/upload` - Upload document -## License +### WebSocket +- Real-time streaming responses via Chainlit protocol -MIT \ No newline at end of file +--- + +## ๐Ÿ”ฎ Future Features + +- [ ] OAuth2 Google authentication +- [ ] Document metadata extraction (dates, amounts, entities) +- [ ] Advanced search filters (type, date range, language) +- [ ] Export results (PDF, CSV, JSON) +- [ ] Analytics dashboard +- [ ] Multi-language support +- [ ] Document versioning +- [ ] Compliance reporting (GDPR, audit trails) + +--- + +## ๐Ÿ“ž Support + +### Troubleshooting +1. Check logs: `docker compose logs chainlit-app` +2. Verify Ollama authentication: `ollama show glm-4.6:cloud` +3. Test Qdrant connection: `curl http://localhost:6333/health` +4. Inspect PostgreSQL: `docker compose exec postgres psql -U ai_user -d ai_station` + +### Performance Tips +- Increase chunk overlap for better context retrieval +- Adjust embedding model based on latency requirements +- Monitor Qdrant memory usage for large document sets +- Implement caching for frequent queries + +--- + +## ๐Ÿ“„ License + +MIT License - See LICENSE file + +## ๐Ÿ‘ค Author + +AI Station Team + +--- + +**Last Updated**: December 26, 2025 +**Version**: 1.0.0 +**Status**: Production Ready โœ… diff --git a/app-oauth2.py b/app-oauth2.py new file mode 100644 index 00000000..93c87cfc --- /dev/null +++ b/app-oauth2.py @@ -0,0 +1,419 @@ +import os +import re +import uuid +import shutil +import httpx +from datetime import datetime +from typing import Optional + +import chainlit as cl +import ollama +import fitz # PyMuPDF +from qdrant_client import AsyncQdrantClient +from qdrant_client.models import PointStruct, Distance, VectorParams +from chainlit.data.sql_alchemy import SQLAlchemyDataLayer +from authlib.integrations.httpx_client import AsyncOAuth2Client +from authlib.integrations.starlette_client import OAuth + + +# === CONFIGURAZIONE === +DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434") +QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333") +GOOGLE_CLIENT_ID = os.getenv("GOOGLE_CLIENT_ID", "") +GOOGLE_CLIENT_SECRET = os.getenv("GOOGLE_CLIENT_SECRET", "") + + +# === INIZIALIZZAZIONE DATA LAYER === +try: + data_layer = SQLAlchemyDataLayer(conninfo=DATABASE_URL) + cl.data_layer = data_layer + print("โœ… SQLAlchemyDataLayer initialized successfully") +except Exception as e: + print(f"โŒ Failed to initialize data layer: {e}") + cl.data_layer = None + + +# === OAUTH2 SETUP === +oauth = OAuth() +oauth.register( + name='google', + client_id=GOOGLE_CLIENT_ID, + client_secret=GOOGLE_CLIENT_SECRET, + server_metadata_url='https://accounts.google.com/.well-known/openid-configuration', + client_kwargs={'scope': 'openid profile email'} +) + + +WORKSPACES_DIR = "./workspaces" + + +# === UTILITY FUNCTIONS === +def create_workspace(user_email: str): + """Crea directory workspace se non esiste""" + # Usa email come identifier (sostituisce caratteri problematici) + safe_email = user_email.replace("@", "_").replace(".", "_") + workspace_path = os.path.join(WORKSPACES_DIR, safe_email) + os.makedirs(workspace_path, exist_ok=True) + return workspace_path + + +def save_code_to_file(code: str, user_email: str) -> str: + """Salva blocco codice come file .py""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + file_name = f"code_{timestamp}.py" + safe_email = user_email.replace("@", "_").replace(".", "_") + file_path = os.path.join(WORKSPACES_DIR, safe_email, file_name) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(code) + + return file_path + + +def extract_text_from_pdf(pdf_path: str) -> str: + """Estrae testo da PDF usando PyMuPDF""" + try: + doc = fitz.open(pdf_path) + text_parts = [] + + for page_num in range(len(doc)): + page = doc[page_num] + text = page.get_text() + text_parts.append(f"--- Pagina {page_num + 1} ---\n{text}\n") + + doc.close() + return "\n".join(text_parts) + + except Exception as e: + print(f"โŒ Errore estrazione PDF: {e}") + return "" + + +# === QDRANT FUNCTIONS === +async def get_qdrant_client() -> AsyncQdrantClient: + """Connessione a Qdrant""" + client = AsyncQdrantClient(url=QDRANT_URL) + collection_name = "documents" + + # Crea collection se non esiste + if not await client.collection_exists(collection_name): + await client.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=768, distance=Distance.COSINE) + ) + + return client + + +async def get_embeddings(text: str) -> list: + """Genera embeddings con Ollama""" + client = ollama.Client(host=OLLAMA_URL) + + # Limita lunghezza per evitare errori + max_length = 2000 + if len(text) > max_length: + text = text[:max_length] + + try: + response = client.embed(model='nomic-embed-text', input=text) + + if 'embeddings' in response: + return response['embeddings'][0] + return response.get('embedding', []) + + except Exception as e: + print(f"โŒ Errore Embedding: {e}") + return [] + + +async def index_document(file_name: str, content: str) -> bool: + """Indicizza documento su Qdrant""" + try: + # Suddividi documento lungo in chunks + chunks = chunk_text(content, max_length=1500) + + qdrant_client = await get_qdrant_client() + points = [] + + for i, chunk in enumerate(chunks): + embeddings = await get_embeddings(chunk) + if not embeddings: + continue + + point_id = str(uuid.uuid4()) + point = PointStruct( + id=point_id, + vector=embeddings, + payload={ + "file_name": file_name, + "content": chunk, + "chunk_index": i, + "total_chunks": len(chunks), + "indexed_at": datetime.now().isoformat() + } + ) + points.append(point) + + if points: + await qdrant_client.upsert(collection_name="documents", points=points) + return True + + return False + + except Exception as e: + print(f"โŒ Errore indicizzazione: {e}") + return False + + +def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list: + """Divide testo in chunks con overlap""" + if len(text) <= max_length: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + end = start + max_length + + # Cerca l'ultimo punto/newline prima del limite + if end < len(text): + last_period = text.rfind('.', start, end) + last_newline = text.rfind('\n', start, end) + split_point = max(last_period, last_newline) + + if split_point > start: + end = split_point + 1 + + chunks.append(text[start:end].strip()) + start = end - overlap # Overlap per continuitร  + + return chunks + + +async def search_qdrant(query_text: str, limit: int = 5) -> str: + """Ricerca documenti rilevanti""" + try: + qdrant_client = await get_qdrant_client() + query_embedding = await get_embeddings(query_text) + + if not query_embedding: + return "" + + search_result = await qdrant_client.query_points( + collection_name="documents", + query=query_embedding, + limit=limit + ) + + contexts = [] + seen_files = set() + + for hit in search_result.points: + if hit.payload: + file_name = hit.payload.get('file_name', 'Unknown') + content = hit.payload.get('content', '') + chunk_idx = hit.payload.get('chunk_index', 0) + score = hit.score if hasattr(hit, 'score') else 0 + + # Evita duplicati dello stesso file + file_key = f"{file_name}_{chunk_idx}" + if file_key not in seen_files: + seen_files.add(file_key) + contexts.append( + f"๐Ÿ“„ **{file_name}** (chunk {chunk_idx+1}, score: {score:.2f})\n" + f"```\n{content[:600]}...\n```" + ) + + return "\n\n".join(contexts) if contexts else "" + + except Exception as e: + print(f"โŒ Errore ricerca Qdrant: {e}") + return "" + + +# === CHAINLIT HANDLERS === +@cl.oauth_callback +def oauth_callback(provider_id: str, token: dict, raw_user_data: dict, question_filter) -> Optional[cl.User]: + """Callback OAuth2 per autenticazione Google""" + if provider_id == "google": + user_email = raw_user_data.get("email", "") + user_name = raw_user_data.get("name", "User") + + # Crea/recupera utente + user = cl.User( + identifier=user_email, + metadata={ + "email": user_email, + "name": user_name, + "picture": raw_user_data.get("picture", ""), + "provider": "google" + } + ) + + # Crea workspace per l'utente + create_workspace(user_email) + + return user + + return None + + +@cl.on_chat_start +async def on_chat_start(): + """Inizializzazione chat""" + # Recupera user da OAuth2 + user = cl.user_session.get("user") + + if user: + user_email = user.identifier + user_name = user.metadata.get("name", "User") + + # Crea workspace + create_workspace(user_email) + + # Salva nella sessione + cl.user_session.set("email", user_email) + cl.user_session.set("name", user_name) + + # Verifica persistenza + persistence_status = "โœ… Attiva" if cl.data_layer else "โš ๏ธ Disattivata" + + await cl.Message( + content=f"๐Ÿ‘‹ **Benvenuto, {user_name}!**\n\n" + f"๐Ÿš€ **AI Station Ready**\n" + f"๐Ÿ“ค Upload **PDF** o **.txt** per indicizzarli nel RAG\n" + f"๐Ÿ’พ Persistenza conversazioni: {persistence_status}\n" + f"๐Ÿค– Modello: `glm-4.6:cloud` @ {OLLAMA_URL}\n\n" + f"๐Ÿ’ก **Supporto PDF attivo**: Carica fatture, F24, dichiarazioni fiscali!" + ).send() + else: + await cl.Message( + content="โŒ Autenticazione fallita. Riprova." + ).send() + + +@cl.on_message +async def on_message(message: cl.Message): + """Gestione messaggi utente""" + user_email = cl.user_session.get("email", "guest") + user_name = cl.user_session.get("name", "User") + + try: + # === STEP 1: Gestione Upload === + if message.elements: + await handle_file_uploads(message.elements, user_email) + + # === STEP 2: RAG Search === + context_text = await search_qdrant(message.content, limit=5) + + # === STEP 3: Preparazione Prompt === + system_prompt = ( + "Sei un assistente AI esperto in analisi documentale e fiscale. " + "Usa ESCLUSIVAMENTE il seguente contesto per rispondere. " + "Se la risposta non รจ nel contesto, dillo chiaramente." + ) + + if context_text: + full_prompt = f"{system_prompt}\n\n**CONTESTO DOCUMENTI:**\n{context_text}\n\n**DOMANDA UTENTE:**\n{message.content}" + else: + full_prompt = f"{system_prompt}\n\n**DOMANDA UTENTE:**\n{message.content}" + + # === STEP 4: Usa glm-4.6:cloud === + client = ollama.Client(host=OLLAMA_URL) + + msg = cl.Message(content="") + await msg.send() + + messages = [{"role": "user", "content": full_prompt}] + + stream = client.chat( + model='glm-4.6:cloud', + messages=messages, + stream=True + ) + + full_response = "" + for chunk in stream: + content = chunk['message']['content'] + full_response += content + await msg.stream_token(content) + + await msg.update() + + # === STEP 5: Estrai e Salva Codice === + code_blocks = re.findall(r"```python\n(.*?)```", full_response, re.DOTALL) + + if code_blocks: + elements = [] + for code in code_blocks: + file_path = save_code_to_file(code.strip(), user_email) + elements.append( + cl.File( + name=os.path.basename(file_path), + path=file_path, + display="inline" + ) + ) + + await cl.Message( + content=f"๐Ÿ’พ Codice salvato in workspace", + elements=elements + ).send() + + except Exception as e: + await cl.Message(content=f"โŒ **Errore:** {str(e)}").send() + + +async def handle_file_uploads(elements, user_email: str): + """Gestisce upload e indicizzazione file (TXT e PDF)""" + for element in elements: + try: + # Salva file + safe_email = user_email.replace("@", "_").replace(".", "_") + dest_path = os.path.join(WORKSPACES_DIR, safe_email, element.name) + shutil.copy(element.path, dest_path) + + content = None + + # Estrai testo in base al tipo di file + if element.name.lower().endswith('.pdf'): + await cl.Message(content=f"๐Ÿ“„ Elaborazione PDF **{element.name}**...").send() + content = extract_text_from_pdf(dest_path) + + if not content: + await cl.Message( + content=f"โš ๏ธ **{element.name}**: PDF vuoto o non leggibile" + ).send() + continue + + elif element.name.lower().endswith('.txt'): + with open(dest_path, 'r', encoding='utf-8') as f: + content = f.read() + + else: + await cl.Message( + content=f"๐Ÿ“ **{element.name}** salvato (supportati: .pdf, .txt)" + ).send() + continue + + # Indicizza su Qdrant + if content: + success = await index_document(element.name, content) + + if success: + word_count = len(content.split()) + await cl.Message( + content=f"โœ… **{element.name}** indicizzato in Qdrant\n" + f"๐Ÿ“Š Parole estratte: {word_count:,}" + ).send() + else: + await cl.Message( + content=f"โš ๏ธ Errore indicizzazione **{element.name}**" + ).send() + + except Exception as e: + await cl.Message( + content=f"โŒ Errore con **{element.name}**: {str(e)}" + ).send() diff --git a/app.py b/app.py index 719d5b12..b195b09e 100644 --- a/app.py +++ b/app.py @@ -3,49 +3,181 @@ import re import uuid import shutil from datetime import datetime -from typing import Optional - +from typing import Optional, Dict, List import chainlit as cl import ollama import fitz # PyMuPDF from qdrant_client import AsyncQdrantClient from qdrant_client.models import PointStruct, Distance, VectorParams from chainlit.data.sql_alchemy import SQLAlchemyDataLayer +from chainlit.data.storage_clients import BaseStorageClient # === CONFIGURAZIONE === DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station") OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434") QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333") +WORKSPACES_DIR = "./workspaces" +STORAGE_DIR = "./.files" + +os.makedirs(STORAGE_DIR, exist_ok=True) +os.makedirs(WORKSPACES_DIR, exist_ok=True) + + +# === MAPPING UTENTI E RUOLI === +USER_PROFILES = { + "giuseppe@defranceschi.pro": { + "role": "admin", + "name": "Giuseppe", + "workspace": "admin_workspace", + "rag_collection": "admin_docs", + "capabilities": ["debug", "system_prompts", "user_management", "all_models"], + "show_code": True + }, + "giuseppe.defranceschi@gmail.com": { + "role": "admin", + "name": "Giuseppe", + "workspace": "admin_workspace", + "rag_collection": "admin_docs", + "capabilities": ["debug", "system_prompts", "user_management", "all_models"], + "show_code": True + }, + "federica.tecchio@gmail.com": { + "role": "business", + "name": "Federica", + "workspace": "business_workspace", + "rag_collection": "contabilita", + "capabilities": ["pdf_upload", "basic_chat"], + "show_code": False + }, + "riccardob545@gmail.com": { + "role": "engineering", + "name": "Riccardo", + "workspace": "engineering_workspace", + "rag_collection": "engineering_docs", + "capabilities": ["code_execution", "data_viz", "advanced_chat"], + "show_code": True + }, + "giuliadefranceschi05@gmail.com": { + "role": "architecture", + "name": "Giulia", + "workspace": "architecture_workspace", + "rag_collection": "architecture_manuals", + "capabilities": ["visual_chat", "pdf_upload", "image_gen"], + "show_code": False + } +} + + +# === CUSTOM LOCAL STORAGE CLIENT === +class LocalStorageClient(BaseStorageClient): + """Storage locale su filesystem per file/elementi""" + + def __init__(self, storage_path: str): + self.storage_path = storage_path + os.makedirs(storage_path, exist_ok=True) + + async def upload_file( + self, + object_key: str, + data: bytes, + mime: str = "application/octet-stream", + overwrite: bool = True, + ) -> Dict[str, str]: + """Salva file localmente""" + file_path = os.path.join(self.storage_path, object_key) + os.makedirs(os.path.dirname(file_path), exist_ok=True) + + with open(file_path, "wb") as f: + f.write(data) + + return { + "object_key": object_key, + "url": f"/files/{object_key}" + } # === INIZIALIZZAZIONE DATA LAYER === +print("๐Ÿ”ง Inizializzazione database...") +storage_client = LocalStorageClient(storage_path=STORAGE_DIR) + try: - data_layer = SQLAlchemyDataLayer(conninfo=DATABASE_URL) + data_layer = SQLAlchemyDataLayer( + conninfo=DATABASE_URL, + storage_provider=storage_client, + user_thread_limit=1000, + show_logger=False + ) + # โฌ‡๏ธ QUESTA RIGA รˆ CRUCIALE PER LA PERSISTENZA cl.data_layer = data_layer - print("โœ… SQLAlchemyDataLayer initialized successfully") + print("โœ… SQLAlchemyDataLayer + LocalStorage initialized successfully") + print(f"โœ… Data layer set: {cl.data_layer is not None}") except Exception as e: print(f"โŒ Failed to initialize data layer: {e}") cl.data_layer = None -WORKSPACES_DIR = "./workspaces" -USER_ROLE = "admin" +# === OAUTH CALLBACK CON RUOLI === +@cl.oauth_callback +def oauth_callback( + provider_id: str, + token: str, + raw_user_data: Dict[str, str], + default_user: cl.User, +) -> Optional[cl.User]: + """Validazione e arricchimento dati utente con ruoli""" + if provider_id == "google": + email = raw_user_data.get("email", "").lower() + + # Verifica se utente รจ autorizzato + if email not in USER_PROFILES: + print(f"โŒ Utente non autorizzato: {email}") + return None # Nega accesso + + # Arricchisci metadata con profilo + profile = USER_PROFILES[email] + default_user.metadata.update({ + "picture": raw_user_data.get("picture", ""), + "locale": raw_user_data.get("locale", "en"), + "role": profile["role"], + "workspace": profile["workspace"], + "rag_collection": profile["rag_collection"], + "capabilities": profile["capabilities"], + "show_code": profile["show_code"], + "display_name": profile["name"] + }) + + print(f"โœ… Utente autorizzato: {email} - Ruolo: {profile['role']}") + return default_user + + return default_user # === UTILITY FUNCTIONS === -def create_workspace(user_role: str): +def get_user_profile(user_email: str) -> Dict: + """Recupera profilo utente""" + return USER_PROFILES.get(user_email.lower(), { + "role": "guest", + "name": "Ospite", + "workspace": "guest_workspace", + "rag_collection": "documents", + "capabilities": [], + "show_code": False + }) + + +def create_workspace(workspace_name: str) -> str: """Crea directory workspace se non esiste""" - workspace_path = os.path.join(WORKSPACES_DIR, user_role) + workspace_path = os.path.join(WORKSPACES_DIR, workspace_name) os.makedirs(workspace_path, exist_ok=True) return workspace_path -def save_code_to_file(code: str, user_role: str) -> str: +def save_code_to_file(code: str, workspace: str) -> str: """Salva blocco codice come file .py""" timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") file_name = f"code_{timestamp}.py" - file_path = os.path.join(WORKSPACES_DIR, user_role, file_name) + file_path = os.path.join(WORKSPACES_DIR, workspace, file_name) with open(file_path, "w", encoding="utf-8") as f: f.write(code) @@ -66,7 +198,6 @@ def extract_text_from_pdf(pdf_path: str) -> str: doc.close() return "\n".join(text_parts) - except Exception as e: print(f"โŒ Errore estrazione PDF: {e}") return "" @@ -75,46 +206,67 @@ def extract_text_from_pdf(pdf_path: str) -> str: # === QDRANT FUNCTIONS === async def get_qdrant_client() -> AsyncQdrantClient: """Connessione a Qdrant""" - client = AsyncQdrantClient(url=QDRANT_URL) - collection_name = "documents" - - # Crea collection se non esiste + return AsyncQdrantClient(url=QDRANT_URL) + + +async def ensure_collection(collection_name: str): + """Crea collection se non esiste""" + client = await get_qdrant_client() if not await client.collection_exists(collection_name): await client.create_collection( collection_name=collection_name, vectors_config=VectorParams(size=768, distance=Distance.COSINE) ) - - return client async def get_embeddings(text: str) -> list: """Genera embeddings con Ollama""" - client = ollama.Client(host=OLLAMA_URL) - - # Limita lunghezza per evitare errori max_length = 2000 if len(text) > max_length: text = text[:max_length] + client = ollama.Client(host=OLLAMA_URL) + try: response = client.embed(model='nomic-embed-text', input=text) - if 'embeddings' in response: return response['embeddings'][0] return response.get('embedding', []) - except Exception as e: print(f"โŒ Errore Embedding: {e}") return [] -async def index_document(file_name: str, content: str) -> bool: - """Indicizza documento su Qdrant""" - try: - # Suddividi documento lungo in chunks - chunks = chunk_text(content, max_length=1500) +def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list: + """Divide testo in chunks con overlap""" + if len(text) <= max_length: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + end = start + max_length + if end < len(text): + last_period = text.rfind('.', start, end) + last_newline = text.rfind('\n', start, end) + split_point = max(last_period, last_newline) + + if split_point > start: + end = split_point + 1 + + chunks.append(text[start:end].strip()) + start = end - overlap + + return chunks + + +async def index_document(file_name: str, content: str, collection_name: str) -> bool: + """Indicizza documento su Qdrant in collection specifica""" + try: + await ensure_collection(collection_name) + chunks = chunk_text(content, max_length=1500) qdrant_client = await get_qdrant_client() points = [] @@ -138,53 +290,31 @@ async def index_document(file_name: str, content: str) -> bool: points.append(point) if points: - await qdrant_client.upsert(collection_name="documents", points=points) + await qdrant_client.upsert(collection_name=collection_name, points=points) return True return False - except Exception as e: print(f"โŒ Errore indicizzazione: {e}") return False -def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list: - """Divide testo in chunks con overlap""" - if len(text) <= max_length: - return [text] - - chunks = [] - start = 0 - - while start < len(text): - end = start + max_length - - # Cerca l'ultimo punto/newline prima del limite - if end < len(text): - last_period = text.rfind('.', start, end) - last_newline = text.rfind('\n', start, end) - split_point = max(last_period, last_newline) - - if split_point > start: - end = split_point + 1 - - chunks.append(text[start:end].strip()) - start = end - overlap # Overlap per continuitร  - - return chunks - - -async def search_qdrant(query_text: str, limit: int = 5) -> str: - """Ricerca documenti rilevanti""" +async def search_qdrant(query_text: str, collection_name: str, limit: int = 5) -> str: + """Ricerca documenti rilevanti in collection specifica""" try: qdrant_client = await get_qdrant_client() + + # Verifica se collection esiste + if not await qdrant_client.collection_exists(collection_name): + return "" + query_embedding = await get_embeddings(query_text) if not query_embedding: return "" search_result = await qdrant_client.query_points( - collection_name="documents", + collection_name=collection_name, query=query_embedding, limit=limit ) @@ -199,17 +329,16 @@ async def search_qdrant(query_text: str, limit: int = 5) -> str: chunk_idx = hit.payload.get('chunk_index', 0) score = hit.score if hasattr(hit, 'score') else 0 - # Evita duplicati dello stesso file file_key = f"{file_name}_{chunk_idx}" if file_key not in seen_files: seen_files.add(file_key) + # โœ… FIX: Markdown code block corretto contexts.append( f"๐Ÿ“„ **{file_name}** (chunk {chunk_idx+1}, score: {score:.2f})\n" - f"```\n{content[:600]}...\n```" + f"``````" ) return "\n\n".join(contexts) if contexts else "" - except Exception as e: print(f"โŒ Errore ricerca Qdrant: {e}") return "" @@ -218,61 +347,170 @@ async def search_qdrant(query_text: str, limit: int = 5) -> str: # === CHAINLIT HANDLERS === @cl.on_chat_start async def on_chat_start(): - """Inizializzazione chat""" - create_workspace(USER_ROLE) + """Inizializzazione chat con profili utente""" + user = cl.user_session.get("user") - # Imposta variabili sessione - cl.user_session.set("role", USER_ROLE) + if user: + user_email = user.identifier + profile = get_user_profile(user_email) + user_name = profile["name"] + user_role = profile["role"] + workspace = profile["workspace"] + user_picture = user.metadata.get("picture", "") + show_code = profile["show_code"] + capabilities = profile["capabilities"] + else: + user_email = "guest@local" + user_name = "Ospite" + user_role = "guest" + workspace = "guest_workspace" + user_picture = "" + show_code = False + capabilities = [] + + create_workspace(workspace) + + # Salva in sessione + cl.user_session.set("email", user_email) + cl.user_session.set("name", user_name) + cl.user_session.set("role", user_role) + cl.user_session.set("workspace", workspace) + cl.user_session.set("show_code", show_code) + cl.user_session.set("capabilities", capabilities) + cl.user_session.set("rag_collection", profile.get("rag_collection", "documents")) + + # Settings basati su ruolo + settings_widgets = [ + cl.input_widget.Select( + id="model", + label="๐Ÿค– Modello AI", + values=["glm-4.6:cloud", "llama3.2", "mistral", "qwen2.5-coder:32b"], + initial_value="glm-4.6:cloud", + ), + cl.input_widget.Slider( + id="temperature", + label="๐ŸŒก๏ธ Temperatura", + initial=0.7, + min=0, + max=2, + step=0.1, + ), + ] + + # Solo admin puรฒ disabilitare RAG + if user_role == "admin": + settings_widgets.append( + cl.input_widget.Switch( + id="rag_enabled", + label="๐Ÿ“š Abilita RAG", + initial=True, + ) + ) + + # โฌ‡๏ธ INVIA SETTINGS (questo attiva l'icona โš™๏ธ) + await cl.ChatSettings(settings_widgets).send() + + # Emoji ruolo + role_emoji = { + "admin": "๐Ÿ‘‘", + "business": "๐Ÿ’ผ", + "engineering": "โš™๏ธ", + "architecture": "๐Ÿ›๏ธ", + "guest": "๐Ÿ‘ค" + } - # Verifica persistenza persistence_status = "โœ… Attiva" if cl.data_layer else "โš ๏ธ Disattivata" + welcome_msg = f"{role_emoji.get(user_role, '๐Ÿ‘‹')} **Benvenuto, {user_name}!**\n\n" + if user_picture: + welcome_msg += f"![Avatar]({user_picture})\n\n" + + welcome_msg += ( + f"๐ŸŽญ **Ruolo**: {user_role.upper()}\n" + f"๐Ÿ“ **Workspace**: `{workspace}`\n" + f"๐Ÿ’พ **Persistenza**: {persistence_status}\n" + f"๐Ÿค– **Modello**: `glm-4.6:cloud`\n\n" + ) + + # Capabilities specifiche + if "debug" in capabilities: + welcome_msg += "๐Ÿ”ง **Modalitร  Debug**: Attiva\n" + if "user_management" in capabilities: + welcome_msg += "๐Ÿ‘ฅ **Gestione Utenti**: Disponibile\n" + if not show_code: + welcome_msg += "๐ŸŽจ **Modalitร  Visuale**: Codice nascosto\n" + + welcome_msg += "\nโš™๏ธ **Usa le Settings (icona โš™๏ธ in alto a destra) per personalizzare!**" + + await cl.Message(content=welcome_msg).send() + + +@cl.on_settings_update +async def on_settings_update(settings): + """Gestisce aggiornamento settings utente""" + cl.user_session.set("settings", settings) + + model = settings.get("model", "glm-4.6:cloud") + temp = settings.get("temperature", 0.7) + rag = settings.get("rag_enabled", True) + await cl.Message( - content=f"๐Ÿš€ **AI Station Ready** - Workspace: `{USER_ROLE}`\n\n" - f"๐Ÿ“ค Upload **PDF** o **.txt** per indicizzarli nel RAG\n" - f"๐Ÿ’พ Persistenza conversazioni: {persistence_status}\n" - f"๐Ÿค– Modello: `glm-4.6:cloud` @ {OLLAMA_URL}\n\n" - f"๐Ÿ’ก **Supporto PDF attivo**: Carica fatture, F24, dichiarazioni fiscali!" + content=f"โœ… **Settings aggiornati**:\n" + f"- ๐Ÿค– Modello: `{model}`\n" + f"- ๐ŸŒก๏ธ Temperatura: `{temp}`\n" + f"- ๐Ÿ“š RAG: {'โœ… Attivo' if rag else 'โŒ Disattivato'}" ).send() @cl.on_message async def on_message(message: cl.Message): - """Gestione messaggi utente""" + """Gestione messaggi utente con RAG intelligente""" + user_email = cl.user_session.get("email", "guest") user_role = cl.user_session.get("role", "guest") + workspace = cl.user_session.get("workspace", "guest_workspace") + show_code = cl.user_session.get("show_code", False) + rag_collection = cl.user_session.get("rag_collection", "documents") + settings = cl.user_session.get("settings", {}) + + model = settings.get("model", "glm-4.6:cloud") + temperature = settings.get("temperature", 0.7) + + # Admin puรฒ disabilitare RAG, altri lo hanno sempre attivo + rag_enabled = settings.get("rag_enabled", True) if user_role == "admin" else True try: - # === STEP 1: Gestione Upload === + # Gestisci upload file if message.elements: - await handle_file_uploads(message.elements, user_role) + await handle_file_uploads(message.elements, workspace, rag_collection) - # === STEP 2: RAG Search === - context_text = await search_qdrant(message.content, limit=5) - - # === STEP 3: Preparazione Prompt === - system_prompt = ( - "Sei un assistente AI esperto in analisi documentale e fiscale. " - "Usa ESCLUSIVAMENTE il seguente contesto per rispondere. " - "Se la risposta non รจ nel contesto, dillo chiaramente." - ) + # RAG Search solo se abilitato + context_text = "" + if rag_enabled: + context_text = await search_qdrant(message.content, rag_collection, limit=5) + # Costruisci prompt con o senza contesto if context_text: + system_prompt = ( + "Sei un assistente AI esperto. " + "Usa il seguente contesto per arricchire la tua risposta, " + "ma puoi anche rispondere usando la tua conoscenza generale se il contesto non รจ sufficiente." + ) full_prompt = f"{system_prompt}\n\n**CONTESTO DOCUMENTI:**\n{context_text}\n\n**DOMANDA UTENTE:**\n{message.content}" else: + system_prompt = "Sei un assistente AI esperto e disponibile. Rispondi in modo chiaro e utile." full_prompt = f"{system_prompt}\n\n**DOMANDA UTENTE:**\n{message.content}" - # === STEP 4: Usa glm-4.6:cloud === + # Streaming risposta da Ollama client = ollama.Client(host=OLLAMA_URL) - msg = cl.Message(content="") await msg.send() messages = [{"role": "user", "content": full_prompt}] - stream = client.chat( - model='glm-4.6:cloud', + model=model, messages=messages, - stream=True + stream=True, + options={"temperature": temperature} ) full_response = "" @@ -283,41 +521,52 @@ async def on_message(message: cl.Message): await msg.update() - # === STEP 5: Estrai e Salva Codice === - code_blocks = re.findall(r"```python\n(.*?)```", full_response, re.DOTALL) + # โœ… FIX: Estrai codice Python con regex corretto + code_blocks = re.findall(r"``````", full_response, re.DOTALL) if code_blocks: elements = [] + + # Se show_code รจ False, nascondi il codice dalla risposta + if not show_code: + cleaned_response = re.sub( + r"``````", + "[๐Ÿ’ป Codice eseguito internamente]", + full_response, + flags=re.DOTALL + ) + await msg.update(content=cleaned_response) + + # Salva codice nel workspace for code in code_blocks: - file_path = save_code_to_file(code.strip(), user_role) + file_path = save_code_to_file(code.strip(), workspace) elements.append( cl.File( name=os.path.basename(file_path), path=file_path, - display="inline" + display="inline" if show_code else "side" ) ) - await cl.Message( - content=f"๐Ÿ’พ Codice salvato in `{user_role}/`", - elements=elements - ).send() + if show_code: + await cl.Message( + content=f"๐Ÿ’พ Codice salvato in workspace `{workspace}`", + elements=elements + ).send() except Exception as e: await cl.Message(content=f"โŒ **Errore:** {str(e)}").send() -async def handle_file_uploads(elements, user_role: str): - """Gestisce upload e indicizzazione file (TXT e PDF)""" +async def handle_file_uploads(elements, workspace: str, collection_name: str): + """Gestisce upload e indicizzazione file in collection specifica""" for element in elements: try: - # Salva file - dest_path = os.path.join(WORKSPACES_DIR, user_role, element.name) + dest_path = os.path.join(WORKSPACES_DIR, workspace, element.name) shutil.copy(element.path, dest_path) content = None - # Estrai testo in base al tipo di file if element.name.lower().endswith('.pdf'): await cl.Message(content=f"๐Ÿ“„ Elaborazione PDF **{element.name}**...").send() content = extract_text_from_pdf(dest_path) @@ -331,21 +580,19 @@ async def handle_file_uploads(elements, user_role: str): elif element.name.lower().endswith('.txt'): with open(dest_path, 'r', encoding='utf-8') as f: content = f.read() - else: await cl.Message( - content=f"๐Ÿ“ **{element.name}** salvato (supportati: .pdf, .txt)" + content=f"๐Ÿ“ **{element.name}** salvato in workspace (supportati: .pdf, .txt)" ).send() continue - # Indicizza su Qdrant if content: - success = await index_document(element.name, content) + success = await index_document(element.name, content, collection_name) if success: word_count = len(content.split()) await cl.Message( - content=f"โœ… **{element.name}** indicizzato in Qdrant\n" + content=f"โœ… **{element.name}** indicizzato in `{collection_name}`\n" f"๐Ÿ“Š Parole estratte: {word_count:,}" ).send() else: diff --git a/app.py.backup b/app.py.backup new file mode 100644 index 00000000..719d5b12 --- /dev/null +++ b/app.py.backup @@ -0,0 +1,359 @@ +import os +import re +import uuid +import shutil +from datetime import datetime +from typing import Optional + +import chainlit as cl +import ollama +import fitz # PyMuPDF +from qdrant_client import AsyncQdrantClient +from qdrant_client.models import PointStruct, Distance, VectorParams +from chainlit.data.sql_alchemy import SQLAlchemyDataLayer + + +# === CONFIGURAZIONE === +DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station") +OLLAMA_URL = os.getenv("OLLAMA_URL", "http://192.168.1.243:11434") +QDRANT_URL = os.getenv("QDRANT_URL", "http://qdrant:6333") + + +# === INIZIALIZZAZIONE DATA LAYER === +try: + data_layer = SQLAlchemyDataLayer(conninfo=DATABASE_URL) + cl.data_layer = data_layer + print("โœ… SQLAlchemyDataLayer initialized successfully") +except Exception as e: + print(f"โŒ Failed to initialize data layer: {e}") + cl.data_layer = None + + +WORKSPACES_DIR = "./workspaces" +USER_ROLE = "admin" + + +# === UTILITY FUNCTIONS === +def create_workspace(user_role: str): + """Crea directory workspace se non esiste""" + workspace_path = os.path.join(WORKSPACES_DIR, user_role) + os.makedirs(workspace_path, exist_ok=True) + return workspace_path + + +def save_code_to_file(code: str, user_role: str) -> str: + """Salva blocco codice come file .py""" + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + file_name = f"code_{timestamp}.py" + file_path = os.path.join(WORKSPACES_DIR, user_role, file_name) + + with open(file_path, "w", encoding="utf-8") as f: + f.write(code) + + return file_path + + +def extract_text_from_pdf(pdf_path: str) -> str: + """Estrae testo da PDF usando PyMuPDF""" + try: + doc = fitz.open(pdf_path) + text_parts = [] + + for page_num in range(len(doc)): + page = doc[page_num] + text = page.get_text() + text_parts.append(f"--- Pagina {page_num + 1} ---\n{text}\n") + + doc.close() + return "\n".join(text_parts) + + except Exception as e: + print(f"โŒ Errore estrazione PDF: {e}") + return "" + + +# === QDRANT FUNCTIONS === +async def get_qdrant_client() -> AsyncQdrantClient: + """Connessione a Qdrant""" + client = AsyncQdrantClient(url=QDRANT_URL) + collection_name = "documents" + + # Crea collection se non esiste + if not await client.collection_exists(collection_name): + await client.create_collection( + collection_name=collection_name, + vectors_config=VectorParams(size=768, distance=Distance.COSINE) + ) + + return client + + +async def get_embeddings(text: str) -> list: + """Genera embeddings con Ollama""" + client = ollama.Client(host=OLLAMA_URL) + + # Limita lunghezza per evitare errori + max_length = 2000 + if len(text) > max_length: + text = text[:max_length] + + try: + response = client.embed(model='nomic-embed-text', input=text) + + if 'embeddings' in response: + return response['embeddings'][0] + return response.get('embedding', []) + + except Exception as e: + print(f"โŒ Errore Embedding: {e}") + return [] + + +async def index_document(file_name: str, content: str) -> bool: + """Indicizza documento su Qdrant""" + try: + # Suddividi documento lungo in chunks + chunks = chunk_text(content, max_length=1500) + + qdrant_client = await get_qdrant_client() + points = [] + + for i, chunk in enumerate(chunks): + embeddings = await get_embeddings(chunk) + if not embeddings: + continue + + point_id = str(uuid.uuid4()) + point = PointStruct( + id=point_id, + vector=embeddings, + payload={ + "file_name": file_name, + "content": chunk, + "chunk_index": i, + "total_chunks": len(chunks), + "indexed_at": datetime.now().isoformat() + } + ) + points.append(point) + + if points: + await qdrant_client.upsert(collection_name="documents", points=points) + return True + + return False + + except Exception as e: + print(f"โŒ Errore indicizzazione: {e}") + return False + + +def chunk_text(text: str, max_length: int = 1500, overlap: int = 200) -> list: + """Divide testo in chunks con overlap""" + if len(text) <= max_length: + return [text] + + chunks = [] + start = 0 + + while start < len(text): + end = start + max_length + + # Cerca l'ultimo punto/newline prima del limite + if end < len(text): + last_period = text.rfind('.', start, end) + last_newline = text.rfind('\n', start, end) + split_point = max(last_period, last_newline) + + if split_point > start: + end = split_point + 1 + + chunks.append(text[start:end].strip()) + start = end - overlap # Overlap per continuitร  + + return chunks + + +async def search_qdrant(query_text: str, limit: int = 5) -> str: + """Ricerca documenti rilevanti""" + try: + qdrant_client = await get_qdrant_client() + query_embedding = await get_embeddings(query_text) + + if not query_embedding: + return "" + + search_result = await qdrant_client.query_points( + collection_name="documents", + query=query_embedding, + limit=limit + ) + + contexts = [] + seen_files = set() + + for hit in search_result.points: + if hit.payload: + file_name = hit.payload.get('file_name', 'Unknown') + content = hit.payload.get('content', '') + chunk_idx = hit.payload.get('chunk_index', 0) + score = hit.score if hasattr(hit, 'score') else 0 + + # Evita duplicati dello stesso file + file_key = f"{file_name}_{chunk_idx}" + if file_key not in seen_files: + seen_files.add(file_key) + contexts.append( + f"๐Ÿ“„ **{file_name}** (chunk {chunk_idx+1}, score: {score:.2f})\n" + f"```\n{content[:600]}...\n```" + ) + + return "\n\n".join(contexts) if contexts else "" + + except Exception as e: + print(f"โŒ Errore ricerca Qdrant: {e}") + return "" + + +# === CHAINLIT HANDLERS === +@cl.on_chat_start +async def on_chat_start(): + """Inizializzazione chat""" + create_workspace(USER_ROLE) + + # Imposta variabili sessione + cl.user_session.set("role", USER_ROLE) + + # Verifica persistenza + persistence_status = "โœ… Attiva" if cl.data_layer else "โš ๏ธ Disattivata" + + await cl.Message( + content=f"๐Ÿš€ **AI Station Ready** - Workspace: `{USER_ROLE}`\n\n" + f"๐Ÿ“ค Upload **PDF** o **.txt** per indicizzarli nel RAG\n" + f"๐Ÿ’พ Persistenza conversazioni: {persistence_status}\n" + f"๐Ÿค– Modello: `glm-4.6:cloud` @ {OLLAMA_URL}\n\n" + f"๐Ÿ’ก **Supporto PDF attivo**: Carica fatture, F24, dichiarazioni fiscali!" + ).send() + + +@cl.on_message +async def on_message(message: cl.Message): + """Gestione messaggi utente""" + user_role = cl.user_session.get("role", "guest") + + try: + # === STEP 1: Gestione Upload === + if message.elements: + await handle_file_uploads(message.elements, user_role) + + # === STEP 2: RAG Search === + context_text = await search_qdrant(message.content, limit=5) + + # === STEP 3: Preparazione Prompt === + system_prompt = ( + "Sei un assistente AI esperto in analisi documentale e fiscale. " + "Usa ESCLUSIVAMENTE il seguente contesto per rispondere. " + "Se la risposta non รจ nel contesto, dillo chiaramente." + ) + + if context_text: + full_prompt = f"{system_prompt}\n\n**CONTESTO DOCUMENTI:**\n{context_text}\n\n**DOMANDA UTENTE:**\n{message.content}" + else: + full_prompt = f"{system_prompt}\n\n**DOMANDA UTENTE:**\n{message.content}" + + # === STEP 4: Usa glm-4.6:cloud === + client = ollama.Client(host=OLLAMA_URL) + + msg = cl.Message(content="") + await msg.send() + + messages = [{"role": "user", "content": full_prompt}] + + stream = client.chat( + model='glm-4.6:cloud', + messages=messages, + stream=True + ) + + full_response = "" + for chunk in stream: + content = chunk['message']['content'] + full_response += content + await msg.stream_token(content) + + await msg.update() + + # === STEP 5: Estrai e Salva Codice === + code_blocks = re.findall(r"```python\n(.*?)```", full_response, re.DOTALL) + + if code_blocks: + elements = [] + for code in code_blocks: + file_path = save_code_to_file(code.strip(), user_role) + elements.append( + cl.File( + name=os.path.basename(file_path), + path=file_path, + display="inline" + ) + ) + + await cl.Message( + content=f"๐Ÿ’พ Codice salvato in `{user_role}/`", + elements=elements + ).send() + + except Exception as e: + await cl.Message(content=f"โŒ **Errore:** {str(e)}").send() + + +async def handle_file_uploads(elements, user_role: str): + """Gestisce upload e indicizzazione file (TXT e PDF)""" + for element in elements: + try: + # Salva file + dest_path = os.path.join(WORKSPACES_DIR, user_role, element.name) + shutil.copy(element.path, dest_path) + + content = None + + # Estrai testo in base al tipo di file + if element.name.lower().endswith('.pdf'): + await cl.Message(content=f"๐Ÿ“„ Elaborazione PDF **{element.name}**...").send() + content = extract_text_from_pdf(dest_path) + + if not content: + await cl.Message( + content=f"โš ๏ธ **{element.name}**: PDF vuoto o non leggibile" + ).send() + continue + + elif element.name.lower().endswith('.txt'): + with open(dest_path, 'r', encoding='utf-8') as f: + content = f.read() + + else: + await cl.Message( + content=f"๐Ÿ“ **{element.name}** salvato (supportati: .pdf, .txt)" + ).send() + continue + + # Indicizza su Qdrant + if content: + success = await index_document(element.name, content) + + if success: + word_count = len(content.split()) + await cl.Message( + content=f"โœ… **{element.name}** indicizzato in Qdrant\n" + f"๐Ÿ“Š Parole estratte: {word_count:,}" + ).send() + else: + await cl.Message( + content=f"โš ๏ธ Errore indicizzazione **{element.name}**" + ).send() + + except Exception as e: + await cl.Message( + content=f"โŒ Errore con **{element.name}**: {str(e)}" + ).send() diff --git a/docker-compose.yml b/docker-compose.yml index 913d118c..b7516f8c 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,49 +1,23 @@ +version: '3.8' + services: - postgres: - image: postgres:15-alpine - container_name: ai-station-postgres - environment: - POSTGRES_DB: ai_station - POSTGRES_USER: ai_user - POSTGRES_PASSWORD: secure_password_here - volumes: - - postgres_data:/var/lib/postgresql/data - ports: - - "5432:5432" - networks: - - ai-station-net - healthcheck: - test: ["CMD-SHELL", "pg_isready -U ai_user -d ai_station"] # <- AGGIUNGI -d ai_station - interval: 10s - timeout: 5s - retries: 5 - restart: unless-stopped - - qdrant: - image: qdrant/qdrant:latest - container_name: ai-station-qdrant - volumes: - - qdrant_data:/qdrant/storage - ports: - - "6333:6333" - - "6334:6334" - networks: - - ai-station-net - restart: unless-stopped - chainlit-app: build: . container_name: ai-station-app ports: - "8000:8000" + env_file: + - .env environment: - DATABASE_URL=postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station - OLLAMA_URL=http://192.168.1.243:11434 - QDRANT_URL=http://qdrant:6333 - - CHAINLIT_AUTH_SECRET=your-secret-key-here + - BGE_API_URL=http://192.168.1.243:8001 volumes: - ./workspaces:/app/workspaces - - ./public:/app/public + - ./public:/app/public # โฌ…๏ธ VERIFICA QUESTO + - ./.files:/app/.files + - ./.chainlit:/app/.chainlit # โฌ…๏ธ AGGIUNGI QUESTO networks: - ai-station-net depends_on: @@ -51,18 +25,41 @@ services: condition: service_healthy qdrant: condition: service_started - command: chainlit run app.py --host 0.0.0.0 --port 8000 restart: unless-stopped -volumes: - postgres_data: - driver: local - qdrant_data: - driver: local + postgres: + image: postgres:15-alpine + container_name: ai-station-postgres + environment: + - POSTGRES_USER=ai_user + - POSTGRES_PASSWORD=secure_password_here + - POSTGRES_DB=ai_station + volumes: + - postgres_data:/var/lib/postgresql/data + networks: + - ai-station-net + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ai_user -d ai_station"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + + qdrant: + image: qdrant/qdrant:latest + container_name: ai-station-qdrant + ports: + - "6333:6333" + volumes: + - qdrant_data:/qdrant/storage + networks: + - ai-station-net + restart: unless-stopped networks: ai-station-net: driver: bridge - ipam: - config: - - subnet: 172.28.0.0/16 + +volumes: + postgres_data: + qdrant_data: diff --git a/init_db.py b/init_db.py index 120ff65d..0871c1d7 100644 --- a/init_db.py +++ b/init_db.py @@ -1,27 +1,27 @@ import asyncio -from sqlalchemy import create_engine, text -from chainlit.data.sql_alchemy import SQLAlchemyDataLayer +import asyncpg +import os +import sys -DATABASE_URL = "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station" +DATABASE_URL = os.getenv("DATABASE_URL", "postgresql+asyncpg://ai_user:secure_password_here@postgres:5432/ai_station") + +# Converti da SQLAlchemy URL a asyncpg +db_url = DATABASE_URL.replace("postgresql+asyncpg://", "postgresql://") async def init_database(): - """Inizializza le tabelle per Chainlit""" print("๐Ÿ”ง Inizializzazione database...") - try: - # Crea data layer - data_layer = SQLAlchemyDataLayer(conninfo=DATABASE_URL) + conn = await asyncpg.connect(db_url) - # Forza creazione tabelle - if hasattr(data_layer, '_create_database'): - await data_layer._create_database() - print("โœ… Database inizializzato con successo") - else: - print("โš ๏ธ Metodo _create_database non disponibile") - print("โ„น๏ธ Le tabelle verranno create automaticamente al primo utilizzo") - + # Crea schema se non esiste (Chainlit lo farร  automaticamente) + print("โœ… Connessione al database riuscita") + print("โ„น๏ธ Le tabelle verranno create automaticamente da Chainlit") + + await conn.close() except Exception as e: - print(f"โŒ Errore: {e}") + print(f"โŒ Errore connessione database: {e}") + sys.exit(1) if __name__ == "__main__": asyncio.run(init_database()) + print("โœ… Inizializzazione database completata") \ No newline at end of file diff --git a/public/custom.css b/public/custom.css new file mode 100644 index 00000000..643f15c2 --- /dev/null +++ b/public/custom.css @@ -0,0 +1,468 @@ +/* ======================================== + AI STATION - PERPLEXITY STYLE UI + ======================================== */ + +/* === ROOT VARIABLES === */ +:root { + /* Palette principale (blu professionale) */ + --primary-color: #6366F1; + --primary-hover: #4F46E5; + --primary-light: #818CF8; + + /* Background dark mode */ + --bg-primary: #0F172A; + --bg-secondary: #1E293B; + --bg-tertiary: #334155; + + /* Text colors */ + --text-primary: #F1F5F9; + --text-secondary: #94A3B8; + --text-muted: #64748B; + + /* Accent colors */ + --accent-green: #10B981; + --accent-red: #EF4444; + --accent-yellow: #F59E0B; + + /* Shadows */ + --shadow-sm: 0 1px 2px 0 rgba(0, 0, 0, 0.3); + --shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.4); + --shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.5); + + /* Border radius */ + --radius-sm: 0.375rem; + --radius-md: 0.5rem; + --radius-lg: 0.75rem; + --radius-xl: 1rem; +} + +/* === BODY & LAYOUT === */ +body { + background: var(--bg-primary) !important; + color: var(--text-primary) !important; + font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', sans-serif !important; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; +} + +/* === HEADER === */ +#app-header { + background: var(--bg-secondary) !important; + border-bottom: 1px solid var(--bg-tertiary) !important; + backdrop-filter: blur(10px); + box-shadow: var(--shadow-sm); +} + +/* Logo styling */ +#app-header img[alt="logo"] { + border-radius: var(--radius-md); + transition: transform 0.2s ease; +} + +#app-header img[alt="logo"]:hover { + transform: scale(1.05); +} + +/* === SIDEBAR === */ +#chat-history-sidebar { + background: var(--bg-secondary) !important; + border-right: 1px solid var(--bg-tertiary) !important; +} + +/* Thread items in sidebar */ +.thread-item { + border-radius: var(--radius-md) !important; + margin: 0.25rem 0.5rem !important; + transition: all 0.2s ease !important; +} + +.thread-item:hover { + background: var(--bg-tertiary) !important; + transform: translateX(4px); +} + +.thread-item.active { + background: var(--primary-color) !important; + color: white !important; +} + +/* === CHAT CONTAINER === */ +#chat-container { + background: var(--bg-primary) !important; + max-width: 1200px; + margin: 0 auto; + padding: 1.5rem; +} + +/* === MESSAGES === */ +/* User message */ +.user-message { + background: var(--bg-tertiary) !important; + border-radius: var(--radius-lg) !important; + padding: 1rem 1.25rem !important; + margin: 0.75rem 0 !important; + box-shadow: var(--shadow-sm); + border-left: 3px solid var(--primary-color); +} + +/* Assistant message */ +.assistant-message { + background: var(--bg-secondary) !important; + border-radius: var(--radius-lg) !important; + padding: 1rem 1.25rem !important; + margin: 0.75rem 0 !important; + box-shadow: var(--shadow-md); +} + +/* Message avatars */ +.message-avatar { + border-radius: 50% !important; + box-shadow: var(--shadow-sm); + border: 2px solid var(--bg-tertiary); +} + +/* === CODE BLOCKS === */ +pre { + background: #1E1E1E !important; + border-radius: var(--radius-md) !important; + padding: 1rem !important; + border: 1px solid var(--bg-tertiary) !important; + box-shadow: var(--shadow-md); + overflow-x: auto; +} + +code { + font-family: 'Monaco', 'Menlo', 'Courier New', monospace !important; + font-size: 0.9rem !important; + color: #E5E7EB !important; +} + +/* Inline code */ +:not(pre) > code { + background: var(--bg-tertiary) !important; + padding: 0.2rem 0.4rem !important; + border-radius: var(--radius-sm) !important; + color: var(--primary-light) !important; +} + +/* === TABLES === */ +table { + width: 100% !important; + border-collapse: separate !important; + border-spacing: 0 !important; + border-radius: var(--radius-md) !important; + overflow: hidden !important; + box-shadow: var(--shadow-md); + margin: 1rem 0 !important; +} + +thead { + background: var(--bg-tertiary) !important; +} + +thead th { + padding: 0.75rem 1rem !important; + text-align: left !important; + font-weight: 600 !important; + color: var(--text-primary) !important; + border-bottom: 2px solid var(--primary-color) !important; +} + +tbody tr { + background: var(--bg-secondary) !important; + transition: background 0.2s ease; +} + +tbody tr:hover { + background: var(--bg-tertiary) !important; +} + +tbody td { + padding: 0.75rem 1rem !important; + border-bottom: 1px solid var(--bg-tertiary) !important; + color: var(--text-secondary) !important; +} + +/* === INPUT AREA === */ +#chat-input-container { + background: var(--bg-secondary) !important; + border-radius: var(--radius-xl) !important; + padding: 1rem !important; + box-shadow: var(--shadow-lg); + border: 1px solid var(--bg-tertiary) !important; +} + +#chat-input { + background: var(--bg-tertiary) !important; + color: var(--text-primary) !important; + border: none !important; + border-radius: var(--radius-lg) !important; + padding: 0.75rem 1rem !important; + font-size: 1rem !important; + resize: none !important; + transition: all 0.2s ease; +} + +#chat-input:focus { + outline: 2px solid var(--primary-color) !important; + outline-offset: 2px; + box-shadow: 0 0 0 3px rgba(99, 102, 241, 0.1); +} + +/* Send button */ +#send-button { + background: var(--primary-color) !important; + color: white !important; + border-radius: var(--radius-md) !important; + padding: 0.75rem 1.5rem !important; + font-weight: 600 !important; + transition: all 0.2s ease !important; + border: none !important; + box-shadow: var(--shadow-md); +} + +#send-button:hover { + background: var(--primary-hover) !important; + transform: translateY(-2px); + box-shadow: var(--shadow-lg); +} + +#send-button:active { + transform: translateY(0); +} + +/* === SETTINGS PANEL === */ +.settings-panel { + background: var(--bg-secondary) !important; + border-radius: var(--radius-lg) !important; + padding: 1.5rem !important; + box-shadow: var(--shadow-lg); + border: 1px solid var(--bg-tertiary) !important; +} + +.settings-item { + margin: 1rem 0 !important; + padding: 0.75rem !important; + background: var(--bg-tertiary) !important; + border-radius: var(--radius-md) !important; + transition: background 0.2s ease; +} + +.settings-item:hover { + background: rgba(99, 102, 241, 0.1) !important; +} + +/* Sliders */ +input[type="range"] { + -webkit-appearance: none; + appearance: none; + width: 100%; + height: 6px; + border-radius: 3px; + background: var(--bg-tertiary); + outline: none; +} + +input[type="range"]::-webkit-slider-thumb { + -webkit-appearance: none; + appearance: none; + width: 18px; + height: 18px; + border-radius: 50%; + background: var(--primary-color); + cursor: pointer; + box-shadow: var(--shadow-md); + transition: all 0.2s ease; +} + +input[type="range"]::-webkit-slider-thumb:hover { + background: var(--primary-hover); + transform: scale(1.1); +} + +/* === BUTTONS === */ +button { + border-radius: var(--radius-md) !important; + font-weight: 500 !important; + transition: all 0.2s ease !important; +} + +button:hover { + transform: translateY(-1px); +} + +/* Primary button */ +.button-primary { + background: var(--primary-color) !important; + color: white !important; +} + +/* Secondary button */ +.button-secondary { + background: var(--bg-tertiary) !important; + color: var(--text-primary) !important; +} + +/* === FILE UPLOAD === */ +.file-upload-area { + border: 2px dashed var(--bg-tertiary) !important; + border-radius: var(--radius-lg) !important; + padding: 2rem !important; + background: var(--bg-secondary) !important; + transition: all 0.3s ease !important; +} + +.file-upload-area:hover { + border-color: var(--primary-color) !important; + background: rgba(99, 102, 241, 0.05) !important; +} + +.file-upload-area.dragging { + border-color: var(--primary-color) !important; + background: rgba(99, 102, 241, 0.1) !important; + transform: scale(1.02); +} + +/* === BADGES & TAGS === */ +.badge { + display: inline-block; + padding: 0.25rem 0.75rem; + border-radius: var(--radius-sm); + font-size: 0.875rem; + font-weight: 600; + margin: 0.25rem; +} + +.badge-admin { + background: var(--accent-red); + color: white; +} + +.badge-business { + background: var(--accent-green); + color: white; +} + +.badge-engineering { + background: var(--primary-color); + color: white; +} + +.badge-architecture { + background: var(--accent-yellow); + color: var(--bg-primary); +} + +/* === LOADING ANIMATION === */ +.loading-dots { + display: inline-flex; + gap: 0.25rem; +} + +.loading-dots span { + width: 8px; + height: 8px; + border-radius: 50%; + background: var(--primary-color); + animation: pulse 1.4s ease-in-out infinite; +} + +.loading-dots span:nth-child(2) { + animation-delay: 0.2s; +} + +.loading-dots span:nth-child(3) { + animation-delay: 0.4s; +} + +@keyframes pulse { + 0%, 100% { + opacity: 0.3; + transform: scale(0.8); + } + 50% { + opacity: 1; + transform: scale(1); + } +} + +/* === SCROLLBAR CUSTOM === */ +::-webkit-scrollbar { + width: 8px; + height: 8px; +} + +::-webkit-scrollbar-track { + background: var(--bg-secondary); +} + +::-webkit-scrollbar-thumb { + background: var(--bg-tertiary); + border-radius: 4px; +} + +::-webkit-scrollbar-thumb:hover { + background: var(--primary-color); +} + +/* === RESPONSIVE === */ +@media (max-width: 768px) { + #chat-container { + padding: 0.75rem; + } + + .user-message, + .assistant-message { + padding: 0.75rem !important; + } + + #app-header { + padding: 0.75rem !important; + } +} + +/* === ANIMATIONS === */ +@keyframes fadeIn { + from { + opacity: 0; + transform: translateY(10px); + } + to { + opacity: 1; + transform: translateY(0); + } +} + +.message { + animation: fadeIn 0.3s ease; +} + +/* === TOOLTIPS === */ +.tooltip { + position: relative; + display: inline-block; +} + +.tooltip .tooltiptext { + visibility: hidden; + background-color: var(--bg-tertiary); + color: var(--text-primary); + text-align: center; + border-radius: var(--radius-sm); + padding: 0.5rem 0.75rem; + position: absolute; + z-index: 1; + bottom: 125%; + left: 50%; + transform: translateX(-50%); + opacity: 0; + transition: opacity 0.3s; + font-size: 0.875rem; + white-space: nowrap; + box-shadow: var(--shadow-md); +} + +.tooltip:hover .tooltiptext { + visibility: visible; + opacity: 1; +} diff --git a/public/dfm-logo-badge.svg b/public/dfm-logo-badge.svg new file mode 100644 index 00000000..55f66a42 --- /dev/null +++ b/public/dfm-logo-badge.svg @@ -0,0 +1,8 @@ + + + + + + + \ No newline at end of file diff --git a/public/images/fav1.png b/public/images/fav1.png new file mode 100644 index 00000000..58745cda Binary files /dev/null and b/public/images/fav1.png differ diff --git a/public/images/fav2.png b/public/images/fav2.png new file mode 100644 index 00000000..1503bb62 Binary files /dev/null and b/public/images/fav2.png differ diff --git a/public/images/fav3.png b/public/images/fav3.png new file mode 100644 index 00000000..f1e7ff31 Binary files /dev/null and b/public/images/fav3.png differ diff --git a/public/images/fav4.png b/public/images/fav4.png new file mode 100644 index 00000000..dda6aa3d Binary files /dev/null and b/public/images/fav4.png differ diff --git a/public/images/favicon.ico b/public/images/favicon.ico new file mode 100644 index 00000000..58745cda Binary files /dev/null and b/public/images/favicon.ico differ diff --git a/public/images/favicon.jpg b/public/images/favicon.jpg new file mode 100644 index 00000000..220d40b1 Binary files /dev/null and b/public/images/favicon.jpg differ diff --git a/public/images/logo1.png b/public/images/logo1.png new file mode 100644 index 00000000..446d3099 Binary files /dev/null and b/public/images/logo1.png differ diff --git a/public/images/logo2.png b/public/images/logo2.png new file mode 100644 index 00000000..e6a0baa1 Binary files /dev/null and b/public/images/logo2.png differ diff --git a/public/images/logoback.png b/public/images/logoback.png new file mode 100644 index 00000000..825f28a6 Binary files /dev/null and b/public/images/logoback.png differ diff --git a/requirements-backup.txt b/requirements-backup.txt new file mode 100644 index 00000000..60cb05f2 --- /dev/null +++ b/requirements-backup.txt @@ -0,0 +1,14 @@ +chainlit==1.3.2 +pydantic==2.9.2 +ollama +asyncpg>=0.29.0 +psycopg2-binary +qdrant-client>=1.10.0 +sqlalchemy>=2.0.0 +greenlet>=3.0.0 +sniffio +aiohttp +alembic +pymupdf +google-generativeai +python-dotenv \ No newline at end of file diff --git a/requirements-oauth2.txt b/requirements-oauth2.txt new file mode 100644 index 00000000..e85f1340 --- /dev/null +++ b/requirements-oauth2.txt @@ -0,0 +1,16 @@ +chainlit==1.3.2 +pydantic==2.9.2 +ollama>=0.1.0 +asyncpg>=0.29.0 +psycopg2-binary +qdrant-client>=1.10.0 +sqlalchemy>=2.0.0 +greenlet>=3.0.0 +sniffio +aiohttp +alembic +pymupdf +python-dotenv +authlib>=1.2.0 +python-multipart>=0.0.6 +httpx>=0.24.0 diff --git a/requirements.txt b/requirements.txt index 60cb05f2..87f66610 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ chainlit==1.3.2 pydantic==2.9.2 -ollama +ollama>=0.1.0 asyncpg>=0.29.0 psycopg2-binary qdrant-client>=1.10.0 @@ -10,5 +10,9 @@ sniffio aiohttp alembic pymupdf -google-generativeai -python-dotenv \ No newline at end of file +python-dotenv +httpx>=0.24.0 +aiofiles>=23.0.0 +langchain>=0.0.208 +boto3>=1.28.0 +azure-storage-file-datalake>=12.14.0