Spaces:

fullstack
/

pylate-document-search

Running on Zero

App Files Files Community

fullstack commited on 2 days ago

Commit

6f629aa

1 Parent(s): 82ac432

.

Browse files

Files changed (1) hide show

app.py +670 -445

app.py CHANGED Viewed

@@ -1,22 +1,153 @@
-import gradio as gr
-import spaces
-import torch
 import os
-import tempfile
-import sqlite3
-import json
-import hashlib
-from pathlib import Path
-from typing import List, Dict, Any, Tuple
-import docx
-import fitz  # pymupdf
-from unstructured.partition.auto import partition
 os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
 os.environ["TORCH_COMPILE_DISABLE"] = "1"
-# PyLate imports
-from pylate import models, indexes, retrieve
 # Global variables for PyLate components
 model = None
@@ -27,483 +158,577 @@ metadata_db = None
 # ===== DOCUMENT PROCESSING FUNCTIONS =====
 def extract_text_from_pdf(file_path: str) -> str:
-   """Extract text from PDF file using PyMuPDF and unstructured as fallback."""
-   text = ""
-   try:
-       # Use PyMuPDF (fitz) - more reliable than PyPDF2
-       doc = fitz.open(file_path)
-       for page in doc:
-           text += page.get_text() + "\n"
-       doc.close()
-       # If no text extracted, try unstructured
-       if not text.strip():
-           elements = partition(filename=file_path)
-           text = "\n".join([str(element) for element in elements])
-   except Exception as e:
-       # Final fallback to unstructured
-       try:
-           elements = partition(filename=file_path)
-           text = "\n".join([str(element) for element in elements])
-       except:
-           text = f"Error: Could not extract text from PDF: {str(e)}"
-   return text.strip()
 def extract_text_from_docx(file_path: str) -> str:
-   """Extract text from DOCX file."""
-   try:
-       doc = docx.Document(file_path)
-       text = ""
-       for paragraph in doc.paragraphs:
-           text += paragraph.text + "\n"
-       return text.strip()
-   except Exception as e:
-       return f"Error: Could not extract text from DOCX: {str(e)}"
 def extract_text_from_txt(file_path: str) -> str:
-   """Extract text from TXT file."""
-   try:
-       with open(file_path, 'r', encoding='utf-8') as file:
-           return file.read().strip()
-   except:
-       try:
-           with open(file_path, 'r', encoding='latin1') as file:
-               return file.read().strip()
-       except Exception as e:
-           return f"Error: Could not read text file: {str(e)}"
 def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[Dict[str, Any]]:
-   """Chunk text with overlap and return metadata."""
-   chunks = []
-   start = 0
-   chunk_index = 0
-   while start < len(text):
-       end = start + chunk_size
-       chunk_text = text[start:end]
-       # Try to break at sentence boundary
-       if end < len(text):
-           last_period = chunk_text.rfind('.')
-           last_newline = chunk_text.rfind('\n')
-           break_point = max(last_period, last_newline)
-           if break_point > chunk_size * 0.7:
-               chunk_text = chunk_text[:break_point + 1]
-               end = start + break_point + 1
-       if chunk_text.strip():
-           chunks.append({
-               'text': chunk_text.strip(),
-               'start': start,
-               'end': end,
-               'index': chunk_index,
-               'length': len(chunk_text.strip())
-           })
-           chunk_index += 1
-       start = max(start + 1, end - overlap)
-   return chunks
 # ===== METADATA DATABASE =====
 def init_metadata_db():
-   """Initialize SQLite database for metadata."""
-   global metadata_db
-   db_path = "metadata.db"
-   metadata_db = sqlite3.connect(db_path, check_same_thread=False)
-   metadata_db.execute("""
-       CREATE TABLE IF NOT EXISTS documents (
-           doc_id TEXT PRIMARY KEY,
-           filename TEXT NOT NULL,
-           file_hash TEXT NOT NULL,
-           original_text TEXT NOT NULL,
-           chunk_index INTEGER NOT NULL,
-           total_chunks INTEGER NOT NULL,
-           chunk_start INTEGER NOT NULL,
-           chunk_end INTEGER NOT NULL,
-           chunk_size INTEGER NOT NULL,
-           created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-       )
-   """)
-   metadata_db.execute("""
-       CREATE INDEX IF NOT EXISTS idx_filename ON documents(filename);
-   """)
-   metadata_db.commit()
 def add_document_metadata(doc_id: str, filename: str, file_hash: str,
                          original_text: str, chunk_info: Dict[str, Any], total_chunks: int):
-   """Add document metadata to database."""
-   global metadata_db
-   metadata_db.execute("""
-       INSERT OR REPLACE INTO documents
-       (doc_id, filename, file_hash, original_text, chunk_index, total_chunks,
-        chunk_start, chunk_end, chunk_size)
-       VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
-   """, (
-       doc_id, filename, file_hash, original_text,
-       chunk_info['index'], total_chunks,
-       chunk_info['start'], chunk_info['end'], chunk_info['length']
-   ))
-   metadata_db.commit()
 def get_document_metadata(doc_id: str) -> Dict[str, Any]:
-   """Get document metadata by ID."""
-   global metadata_db
-   cursor = metadata_db.execute(
-       "SELECT * FROM documents WHERE doc_id = ?", (doc_id,)
-   )
-   row = cursor.fetchone()
-   if row:
-       columns = [desc[0] for desc in cursor.description]
-       return dict(zip(columns, row))
-   return {}
 # ===== PYLATE INITIALIZATION =====
-@spaces.GPU
 def initialize_pylate(model_name: str = "colbert-ir/colbertv2.0") -> str:
-   """Initialize PyLate components on GPU."""
-   global model, index, retriever
-   try:
-       # Initialize metadata database
-       init_metadata_db()
-       # Load ColBERT model
-       model = models.ColBERT(model_name_or_path=model_name)
-       # Move to GPU if available
-       if torch.cuda.is_available():
-           model = model.to('cuda')
-       # Initialize PLAID index with CPU fallback for k-means
-       index = indexes.PLAID(
-           index_folder="./pylate_index",
-           index_name="documents",
-           override=True,
-           kmeans_niters=1,  # Reduce k-means iterations
-           nbits=1           # Reduce quantization bits
-       )
-       # Initialize retriever
-       retriever = retrieve.ColBERT(index=index)
-       return f"✅ PyLate initialized successfully!\nModel: {model_name}\nDevice: {'GPU' if torch.cuda.is_available() else 'CPU'}"
-   except Exception as e:
-       return f"❌ Error initializing PyLate: {str(e)}"
 # ===== DOCUMENT PROCESSING =====
-@spaces.GPU
 def process_documents(files, chunk_size: int = 1000, overlap: int = 100) -> str:
-   """Process uploaded documents and add to index."""
-   global model, index, metadata_db
-   if not model or not index:
-       return "❌ Please initialize PyLate first!"
-   if not files:
-       return "❌ No files uploaded!"
-   try:
-       all_documents = []
-       all_doc_ids = []
-       processed_files = []
-       for file in files:
-           # Get file info
-           filename = Path(file.name).name
-           file_path = file.name
-           # Calculate file hash
-           with open(file_path, 'rb') as f:
-               file_hash = hashlib.md5(f.read()).hexdigest()
-           # Extract text based on file type
-           if filename.lower().endswith('.pdf'):
-               text = extract_text_from_pdf(file_path)
-           elif filename.lower().endswith('.docx'):
-               text = extract_text_from_docx(file_path)
-           elif filename.lower().endswith('.txt'):
-               text = extract_text_from_txt(file_path)
-           else:
-               continue
-           if not text or text.startswith("Error:"):
-               processed_files.append(f"{filename}: Failed to extract text")
-               continue
-           # Chunk the text
-           chunks = chunk_text(text, chunk_size, overlap)
-           # Process each chunk
-           for chunk in chunks:
-               doc_id = f"{filename}_chunk_{chunk['index']}"
-               all_documents.append(chunk['text'])
-               all_doc_ids.append(doc_id)
-               # Store metadata
-               add_document_metadata(
-                   doc_id=doc_id,
-                   filename=filename,
-                   file_hash=file_hash,
-                   original_text=chunk['text'],
-                   chunk_info=chunk,
-                   total_chunks=len(chunks)
-               )
-           processed_files.append(f"{filename}: {len(chunks)} chunks")
-       if not all_documents:
-           return "❌ No text could be extracted from uploaded files!"
-       # Encode documents with PyLate
-       document_embeddings = model.encode(
-           all_documents,
-           batch_size=16,  # Smaller batch for ZeroGPU
-           is_query=False,
-           show_progress_bar=True
-       )
-       # Add to PLAID index
-       index.add_documents(
-           documents_ids=all_doc_ids,
-           documents_embeddings=document_embeddings
-       )
-       result = f"✅ Successfully processed {len(files)} files:\n"
-       result += f"📄 Total chunks: {len(all_documents)}\n"
-       result += f"🔍 Indexed documents:\n"
-       for file_info in processed_files:
-           result += f"  • {file_info}\n"
-       return result
-   except Exception as e:
-       return f"❌ Error processing documents: {str(e)}"
 # ===== SEARCH FUNCTION =====
-@spaces.GPU
 def search_documents(query: str, k: int = 5, show_chunks: bool = True) -> str:
-   """Search documents using PyLate."""
-   global model, retriever, metadata_db
-   if not model or not retriever:
-       return "❌ Please initialize PyLate and process documents first!"
-   if not query.strip():
-       return "❌ Please enter a search query!"
-   try:
-       # Encode query
-       query_embedding = model.encode([query], is_query=True)
-       # Search
-       results = retriever.retrieve(query_embedding, k=k)[0]
-       if not results:
-           return "🔍 No results found for your query."
-       # Format results with metadata
-       formatted_results = [f"🔍 **Search Results for:** '{query}'\n"]
-       for i, result in enumerate(results):
-           doc_id = result['id']
-           score = result['score']
-           # Get metadata
-           metadata = get_document_metadata(doc_id)
-           formatted_results.append(f"## Result {i+1} (Score: {score:.2f})")
-           formatted_results.append(
-               f"**File:** {metadata.get('filename', 'Unknown')}")
-           formatted_results.append(
-               f"**Chunk:** {metadata.get('chunk_index', 0) + 1}/{metadata.get('total_chunks', 1)}")
-           if show_chunks:
-               text = metadata.get('original_text', '')
-               preview = text[:300] + "..." if len(text) > 300 else text
-               formatted_results.append(f"**Text:** {preview}")
-           formatted_results.append("---")
-       return "\n".join(formatted_results)
-   except Exception as e:
-       return f"❌ Error searching: {str(e)}"
 # ===== GRADIO INTERFACE =====
 def create_interface():
-   """Create the Gradio interface."""
-   with gr.Blocks(title="PyLate Document Search", theme=gr.themes.Soft()) as demo:
-       gr.Markdown("""
-       # 🔍 PyLate Document Search
-       ### Powered by ColBERT and ZeroGPU
-       Upload documents, process them with PyLate, and perform semantic search!
-       **Note:** Using PyMuPDF and Unstructured for robust PDF text extraction.
-       """)
-       with gr.Tab("🚀 Setup"):
-           gr.Markdown("### Initialize PyLate System")
-           model_choice = gr.Dropdown(
-               choices=[
-                   "colbert-ir/colbertv2.0",
-                   "sentence-transformers/all-MiniLM-L6-v2"
-               ],
-               value="colbert-ir/colbertv2.0",
-               label="Select Model"
-           )
-           init_btn = gr.Button("Initialize PyLate", variant="primary")
-           init_status = gr.Textbox(label="Initialization Status", lines=3)
-           init_btn.click(
-               initialize_pylate,
-               inputs=model_choice,
-               outputs=init_status
-           )
-       with gr.Tab("📄 Document Upload"):
-           gr.Markdown("### Upload and Process Documents")
-           with gr.Row():
-               with gr.Column():
-                   file_upload = gr.File(
-                       file_count="multiple",
-                       file_types=[".pdf", ".docx", ".txt"],
-                       label="Upload Documents (PDF, DOCX, TXT)"
-                   )
-                   with gr.Row():
-                       chunk_size = gr.Slider(
-                           minimum=500,
-                           maximum=3000,
-                           value=1000,
-                           step=100,
-                           label="Chunk Size (characters)"
-                       )
-                       overlap = gr.Slider(
-                           minimum=0,
-                           maximum=500,
-                           value=100,
-                           step=50,
-                           label="Chunk Overlap (characters)"
-                       )
-                   process_btn = gr.Button(
-                       "Process Documents", variant="primary")
-               with gr.Column():
-                   process_status = gr.Textbox(
-                       label="Processing Status",
-                       lines=10,
-                       max_lines=15
-                   )
-           process_btn.click(
-               process_documents,
-               inputs=[file_upload, chunk_size, overlap],
-               outputs=process_status
-           )
-       with gr.Tab("🔍 Search"):
-           gr.Markdown("### Search Your Documents")
-           with gr.Row():
-               with gr.Column():
-                   search_query = gr.Textbox(
-                       label="Search Query",
-                       placeholder="Enter your search query...",
-                       lines=2
-                   )
-                   with gr.Row():
-                       num_results = gr.Slider(
-                           minimum=1,
-                           maximum=20,
-                           value=5,
-                           step=1,
-                           label="Number of Results"
-                       )
-                       show_chunks = gr.Checkbox(
-                           value=True,
-                           label="Show Text Chunks"
-                       )
-                   search_btn = gr.Button("Search", variant="primary")
-               with gr.Column():
-                   search_results = gr.Textbox(
-                       label="Search Results",
-                       lines=15,
-                       max_lines=20
-                   )
-           search_btn.click(
-               search_documents,
-               inputs=[search_query, num_results, show_chunks],
-               outputs=search_results
-           )
-       with gr.Tab("ℹ️ Info"):
-           gr.Markdown("""
-           ### About This System
-           **PyLate Document Search** is a semantic search system that uses:
-           - **PyLate**: A flexible library for ColBERT models
-           - **ColBERT**: Late interaction retrieval for high-quality search
-           - **ZeroGPU**: Hugging Face's free GPU infrastructure
-           #### Features:
-           - 📄 Multi-format document support (PDF, DOCX, TXT)
-           - ✂️ Intelligent text chunking with overlap
-           - 🧠 Semantic search using ColBERT embeddings
-           - 💾 Metadata tracking for result context
-           - ⚡ GPU-accelerated processing
-           #### PDF Processing:
-           - Uses PyMuPDF (fitz) for reliable text extraction
-           - Falls back to Unstructured for complex PDFs
-           - No dependency on PyPDF2
-           #### Usage Tips:
-           1. Initialize the system first (required)
-           2. Upload your documents and process them
-           3. Use natural language queries for best results
-           4. Adjust chunk size based on your document types
-           Built with ❤️ using PyLate and Gradio
-           """)
-   return demo
 # ===== MAIN =====
 if __name__ == "__main__":
-   demo = create_interface()
-   demo.launch(
-       share=False,
-       server_name="0.0.0.0",
-       server_port=7860
-   )

+#!/usr/bin/env python3
+"""
+PyLate ZeroGPU Document Search with Runtime Package Installation
+Complete version that installs all dependencies at startup if needed.
+"""
+import subprocess
+import sys
 import os
+import time
+print("🚀 Starting PyLate ZeroGPU Document Search...")
+print("🔧 Checking and installing required packages...")
+# ===== RUNTIME PACKAGE INSTALLATION =====
+def install_package(package, quiet=True):
+    """Install a package at runtime."""
+    try:
+        if quiet:
+            subprocess.check_call([
+                sys.executable, '-m', 'pip', 'install', package,
+                '--quiet', '--disable-pip-version-check'
+            ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+        else:
+            subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
+        return True
+    except Exception as e:
+        print(f"⚠️ Failed to install {package}: {e}")
+        return False
+def check_and_install_packages():
+    """Check and install all required packages."""
+    # Define packages with their import names and pip names
+    packages_to_check = [
+        # (import_name, pip_package, test_import)
+        ('gradio', 'gradio==4.44.0', lambda: __import__('gradio')),
+        ('spaces', 'spaces', lambda: __import__('spaces')),
+        ('torch', 'torch', lambda: __import__('torch')),
+        ('torchvision', 'torchvision', lambda: __import__('torchvision')),
+        ('torchaudio', 'torchaudio', lambda: __import__('torchaudio')),
+        ('transformers', 'transformers==4.48.2', lambda: __import__('transformers')),
+        ('sentence_transformers', 'sentence-transformers', lambda: __import__('sentence_transformers')),
+        ('docx', 'python-docx', lambda: __import__('docx')),
+        ('fitz', 'pymupdf', lambda: __import__('fitz')),
+        ('unstructured', 'unstructured', lambda: __import__('unstructured')),
+        ('pandas', 'pandas', lambda: __import__('pandas')),
+        ('numpy', 'numpy', lambda: __import__('numpy')),
+        ('huggingface_hub', 'huggingface_hub', lambda: __import__('huggingface_hub')),
+        ('accelerate', 'accelerate', lambda: __import__('accelerate')),
+        ('pylate', 'pylate==1.2.0', lambda: __import__('pylate')),
+    ]
+    installed_count = 0
+    failed_packages = []
+    for import_name, pip_package, test_func in packages_to_check:
+        try:
+            test_func()
+            print(f"✅ {import_name} - already installed")
+            installed_count += 1
+        except ImportError:
+            print(f"📦 Installing {pip_package}...")
+            success = install_package(pip_package, quiet=False)
+            if success:
+                try:
+                    # Test import after installation
+                    test_func()
+                    print(f"✅ {import_name} - installed successfully")
+                    installed_count += 1
+                except ImportError:
+                    print(f"❌ {import_name} - installation failed (import test failed)")
+                    failed_packages.append(import_name)
+            else:
+                print(f"❌ {import_name} - installation failed")
+                failed_packages.append(import_name)
+    print(f"\n📊 Installation Summary:")
+    print(f"   ✅ Successfully installed/verified: {installed_count}/{len(packages_to_check)}")
+    if failed_packages:
+        print(f"   ❌ Failed packages: {', '.join(failed_packages)}")
+        print(f"   ⚠️ App may not work correctly with missing packages")
+    else:
+        print(f"   🎉 All packages ready!")
+    return len(failed_packages) == 0
+# Install packages before importing anything else
+installation_success = check_and_install_packages()
+# Now import everything
+print("\n🔄 Loading modules...")
+try:
+    import gradio as gr
+    import spaces
+    import torch
+    import tempfile
+    import sqlite3
+    import json
+    import hashlib
+    from pathlib import Path
+    from typing import List, Dict, Any, Tuple
+    print("✅ Core modules loaded")
+except ImportError as e:
+    print(f"❌ Failed to import core modules: {e}")
+    sys.exit(1)
+# Import document processing modules with fallbacks
+try:
+    import docx
+    print("✅ python-docx loaded")
+except ImportError:
+    print("⚠️ python-docx not available - DOCX processing will be disabled")
+    docx = None
+try:
+    import fitz  # pymupdf
+    print("✅ PyMuPDF loaded")
+except ImportError:
+    print("⚠️ PyMuPDF not available - PDF processing will be limited")
+    fitz = None
+try:
+    from unstructured.partition.auto import partition
+    print("✅ Unstructured loaded")
+except ImportError:
+    print("⚠️ Unstructured not available - fallback text extraction disabled")
+    partition = None
+try:
+    from pylate import models, indexes, retrieve
+    print("✅ PyLate loaded")
+except ImportError as e:
+    print(f"❌ PyLate failed to load: {e}")
+    print("🔄 Attempting to install PyLate...")
+    install_package('pylate==1.2.0', quiet=False)
+    try:
+        from pylate import models, indexes, retrieve
+        print("✅ PyLate loaded after installation")
+    except ImportError:
+        print("❌ PyLate installation failed - core functionality unavailable")
+        sys.exit(1)
+# Set environment variables
 os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache"
 os.environ["TORCH_COMPILE_DISABLE"] = "1"
+print("🎯 All modules loaded successfully!\n")
 # Global variables for PyLate components
 model = None
 # ===== DOCUMENT PROCESSING FUNCTIONS =====
 def extract_text_from_pdf(file_path: str) -> str:
+    """Extract text from PDF file using PyMuPDF and unstructured as fallback."""
+    text = ""
+    if not fitz:
+        return "Error: PyMuPDF not available for PDF processing"
+    try:
+        # Use PyMuPDF (fitz) - more reliable than PyPDF2
+        doc = fitz.open(file_path)
+        for page in doc:
+            text += page.get_text() + "\n"
+        doc.close()
+        # If no text extracted, try unstructured
+        if not text.strip() and partition:
+            elements = partition(filename=file_path)
+            text = "\n".join([str(element) for element in elements])
+    except Exception as e:
+        # Final fallback to unstructured
+        if partition:
+            try:
+                elements = partition(filename=file_path)
+                text = "\n".join([str(element) for element in elements])
+            except:
+                text = f"Error: Could not extract text from PDF: {str(e)}"
+        else:
+            text = f"Error: Could not extract text from PDF: {str(e)}"
+    return text.strip()
 def extract_text_from_docx(file_path: str) -> str:
+    """Extract text from DOCX file."""
+    if not docx:
+        return "Error: python-docx not available for DOCX processing"
+    try:
+        doc = docx.Document(file_path)
+        text = ""
+        for paragraph in doc.paragraphs:
+            text += paragraph.text + "\n"
+        return text.strip()
+    except Exception as e:
+        return f"Error: Could not extract text from DOCX: {str(e)}"
 def extract_text_from_txt(file_path: str) -> str:
+    """Extract text from TXT file."""
+    try:
+        with open(file_path, 'r', encoding='utf-8') as file:
+            return file.read().strip()
+    except UnicodeDecodeError:
+        try:
+            with open(file_path, 'r', encoding='latin1') as file:
+                return file.read().strip()
+        except Exception as e:
+            return f"Error: Could not read text file: {str(e)}"
+    except Exception as e:
+        return f"Error: Could not read text file: {str(e)}"
 def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[Dict[str, Any]]:
+    """Chunk text with overlap and return metadata."""
+    chunks = []
+    start = 0
+    chunk_index = 0
+    while start < len(text):
+        end = start + chunk_size
+        chunk_text = text[start:end]
+        # Try to break at sentence boundary
+        if end < len(text):
+            last_period = chunk_text.rfind('.')
+            last_newline = chunk_text.rfind('\n')
+            break_point = max(last_period, last_newline)
+            if break_point > chunk_size * 0.7:
+                chunk_text = chunk_text[:break_point + 1]
+                end = start + break_point + 1
+        if chunk_text.strip():
+            chunks.append({
+                'text': chunk_text.strip(),
+                'start': start,
+                'end': end,
+                'index': chunk_index,
+                'length': len(chunk_text.strip())
+            })
+            chunk_index += 1
+        start = max(start + 1, end - overlap)
+    return chunks
 # ===== METADATA DATABASE =====
 def init_metadata_db():
+    """Initialize SQLite database for metadata."""
+    global metadata_db
+    db_path = "metadata.db"
+    metadata_db = sqlite3.connect(db_path, check_same_thread=False)
+    metadata_db.execute("""
+        CREATE TABLE IF NOT EXISTS documents (
+            doc_id TEXT PRIMARY KEY,
+            filename TEXT NOT NULL,
+            file_hash TEXT NOT NULL,
+            original_text TEXT NOT NULL,
+            chunk_index INTEGER NOT NULL,
+            total_chunks INTEGER NOT NULL,
+            chunk_start INTEGER NOT NULL,
+            chunk_end INTEGER NOT NULL,
+            chunk_size INTEGER NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    """)
+    metadata_db.execute("""
+        CREATE INDEX IF NOT EXISTS idx_filename ON documents(filename);
+    """)
+    metadata_db.commit()
 def add_document_metadata(doc_id: str, filename: str, file_hash: str,
                          original_text: str, chunk_info: Dict[str, Any], total_chunks: int):
+    """Add document metadata to database."""
+    global metadata_db
+    metadata_db.execute("""
+        INSERT OR REPLACE INTO documents
+        (doc_id, filename, file_hash, original_text, chunk_index, total_chunks,
+         chunk_start, chunk_end, chunk_size)
+        VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+    """, (
+        doc_id, filename, file_hash, original_text,
+        chunk_info['index'], total_chunks,
+        chunk_info['start'], chunk_info['end'], chunk_info['length']
+    ))
+    metadata_db.commit()
 def get_document_metadata(doc_id: str) -> Dict[str, Any]:
+    """Get document metadata by ID."""
+    global metadata_db
+    cursor = metadata_db.execute(
+        "SELECT * FROM documents WHERE doc_id = ?", (doc_id,)
+    )
+    row = cursor.fetchone()
+    if row:
+        columns = [desc[0] for desc in cursor.description]
+        return dict(zip(columns, row))
+    return {}
 # ===== PYLATE INITIALIZATION =====
+@spaces.GPU(duration=120)  # Allow 2 minutes for initialization
 def initialize_pylate(model_name: str = "colbert-ir/colbertv2.0") -> str:
+    """Initialize PyLate components on ZeroGPU H200."""
+    global model, index, retriever
+    try:
+        # Initialize metadata database
+        init_metadata_db()
+        # Load ColBERT model
+        model = models.ColBERT(model_name_or_path=model_name)
+        # Move to GPU - ZeroGPU provides CUDA access
+        device_info = "CPU"
+        if torch.cuda.is_available():
+            model = model.to('cuda')
+            device_name = torch.cuda.get_device_name()
+            device_info = f"GPU: {device_name}"
+        # Initialize PLAID index with optimized settings for ZeroGPU
+        index = indexes.PLAID(
+            index_folder="./pylate_index",
+            index_name="documents",
+            override=True,
+            kmeans_niters=1,  # Reduce k-means iterations for faster setup
+            nbits=2           # Optimized for memory efficiency
+        )
+        # Initialize retriever
+        retriever = retrieve.ColBERT(index=index)
+        return f"✅ PyLate initialized successfully on ZeroGPU!\n🔥 Model: {model_name}\n🎯 Device: {device_info}\n💾 VRAM: ~70GB available\n🚀 Ready for document processing!"
+    except Exception as e:
+        return f"❌ Error initializing PyLate: {str(e)}\n\nPlease check the logs for more details."
 # ===== DOCUMENT PROCESSING =====
+@spaces.GPU(duration=300)  # Allow 5 minutes for processing
 def process_documents(files, chunk_size: int = 1000, overlap: int = 100) -> str:
+    """Process uploaded documents and add to index using ZeroGPU."""
+    global model, index, metadata_db
+    if not model or not index:
+        return "❌ Please initialize PyLate first!"
+    if not files:
+        return "❌ No files uploaded!"
+    try:
+        all_documents = []
+        all_doc_ids = []
+        processed_files = []
+        skipped_files = []
+        for file in files:
+            # Get file info
+            filename = Path(file.name).name
+            file_path = file.name
+            # Calculate file hash
+            with open(file_path, 'rb') as f:
+                file_hash = hashlib.md5(f.read()).hexdigest()
+            # Extract text based on file type
+            text = ""
+            if filename.lower().endswith('.pdf'):
+                if fitz:
+                    text = extract_text_from_pdf(file_path)
+                else:
+                    skipped_files.append(f"{filename}: PDF processing not available")
+                    continue
+            elif filename.lower().endswith('.docx'):
+                if docx:
+                    text = extract_text_from_docx(file_path)
+                else:
+                    skipped_files.append(f"{filename}: DOCX processing not available")
+                    continue
+            elif filename.lower().endswith('.txt'):
+                text = extract_text_from_txt(file_path)
+            else:
+                skipped_files.append(f"{filename}: Unsupported file type")
+                continue
+            if not text or text.startswith("Error:"):
+                skipped_files.append(f"{filename}: Failed to extract text")
+                continue
+            # Chunk the text
+            chunks = chunk_text(text, chunk_size, overlap)
+            if not chunks:
+                skipped_files.append(f"{filename}: No valid chunks created")
+                continue
+            # Process each chunk
+            for chunk in chunks:
+                doc_id = f"{filename}_chunk_{chunk['index']}"
+                all_documents.append(chunk['text'])
+                all_doc_ids.append(doc_id)
+                # Store metadata
+                add_document_metadata(
+                    doc_id=doc_id,
+                    filename=filename,
+                    file_hash=file_hash,
+                    original_text=chunk['text'],
+                    chunk_info=chunk,
+                    total_chunks=len(chunks)
+                )
+            processed_files.append(f"{filename}: {len(chunks)} chunks")
+        if not all_documents:
+            return "❌ No text could be extracted from uploaded files!\n" + "\n".join(skipped_files)
+        # Encode documents with PyLate on H200 GPU
+        document_embeddings = model.encode(
+            all_documents,
+            batch_size=32,  # Optimized batch size for H200's 70GB VRAM
+            is_query=False,
+            show_progress_bar=True
+        )
+        # Add to PLAID index
+        index.add_documents(
+            documents_ids=all_doc_ids,
+            documents_embeddings=document_embeddings
+        )
+        result = f"✅ Successfully processed {len([f for f in files if not any(f.name in skip for skip in skipped_files)])} files on ZeroGPU H200:\n"
+        result += f"📄 Total chunks indexed: {len(all_documents)}\n"
+        result += f"🔍 Documents processed:\n"
+        for file_info in processed_files:
+            result += f"  • {file_info}\n"
+        if skipped_files:
+            result += f"\n⚠️ Skipped files:\n"
+            for skip_info in skipped_files:
+                result += f"  • {skip_info}\n"
+        result += f"\n🎉 Document index ready for search!"
+        return result
+    except Exception as e:
+        return f"❌ Error processing documents: {str(e)}\n\nPlease check your files and try again."
 # ===== SEARCH FUNCTION =====
+@spaces.GPU(duration=60)  # 1 minute for search
 def search_documents(query: str, k: int = 5, show_chunks: bool = True) -> str:
+    """Search documents using PyLate on ZeroGPU."""
+    global model, retriever, metadata_db
+    if not model or not retriever:
+        return "❌ Please initialize PyLate and process documents first!"
+    if not query.strip():
+        return "❌ Please enter a search query!"
+    try:
+        # Encode query on GPU
+        query_embedding = model.encode([query], is_query=True)
+        # Search
+        results = retriever.retrieve(query_embedding, k=k)[0]
+        if not results:
+            return "🔍 No results found for your query.\n\nTry:\n• Different keywords\n• Broader search terms\n• Check if documents were processed correctly"
+        # Format results with metadata
+        formatted_results = [f"🔍 **Search Results for:** '{query}' (powered by ZeroGPU H200)\n"]
+        for i, result in enumerate(results):
+            doc_id = result['id']
+            score = result['score']
+            # Get metadata
+            metadata = get_document_metadata(doc_id)
+            formatted_results.append(f"## Result {i+1} (Relevance: {score:.3f})")
+            formatted_results.append(
+                f"**📄 File:** {metadata.get('filename', 'Unknown')}")
+            formatted_results.append(
+                f"**📑 Chunk:** {metadata.get('chunk_index', 0) + 1}/{metadata.get('total_chunks', 1)}")
+            if show_chunks:
+                text = metadata.get('original_text', '')
+                if len(text) > 400:
+                    preview = text[:400] + "..."
+                else:
+                    preview = text
+                formatted_results.append(f"**💬 Text:** {preview}")
+            formatted_results.append("---")
+        formatted_results.append(f"\n🎯 Found {len(results)} relevant results using ColBERT semantic search")
+        return "\n".join(formatted_results)
+    except Exception as e:
+        return f"❌ Error searching: {str(e)}\n\nPlease try again or check if PyLate is properly initialized."
 # ===== GRADIO INTERFACE =====
 def create_interface():
+    """Create the Gradio interface for ZeroGPU."""
+    with gr.Blocks(title="PyLate ZeroGPU Document Search", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("""
+        # 🚀 PyLate ZeroGPU Document Search
+        ### Powered by ColBERT and NVIDIA H200 (70GB VRAM)
+        Upload documents, process them with PyLate on ZeroGPU, and perform lightning-fast semantic search!
+        **🔥 ZeroGPU Features:**
+        - 🎯 NVIDIA H200 GPU with 70GB VRAM
+        - ⚡ Dynamic GPU allocation (only when needed)
+        - 🆓 Free for HF Pro subscribers
+        - 🚀 Optimized for PyTorch/ColBERT workloads
+        - 🔄 Automatic package installation
+        """)
+        # Status indicator
+        with gr.Row():
+            gr.Markdown(f"""
+            **📊 System Status:**
+            - ✅ PyLate: Ready
+            - ✅ Document Processing: {"PDF ✅" if fitz else "PDF ❌"} | {"DOCX ✅" if docx else "DOCX ❌"} | TXT ✅
+            - ✅ ZeroGPU: Available
+            """)
+        with gr.Tab("🚀 Setup"):
+            gr.Markdown("### Initialize PyLate System on ZeroGPU H200")
+            model_choice = gr.Dropdown(
+                choices=[
+                    "colbert-ir/colbertv2.0",
+                    "sentence-transformers/all-MiniLM-L6-v2"
+                ],
+                value="colbert-ir/colbertv2.0",
+                label="Select ColBERT Model",
+                info="ColBERT v2.0 is recommended for best performance"
+            )
+            init_btn = gr.Button("🚀 Initialize PyLate on ZeroGPU", variant="primary", size="lg")
+            init_status = gr.Textbox(label="Initialization Status", lines=6, max_lines=10)
+            init_btn.click(
+                initialize_pylate,
+                inputs=model_choice,
+                outputs=init_status
+            )
+        with gr.Tab("📄 Document Upload"):
+            gr.Markdown("### Upload and Process Documents on H200 GPU")
+            with gr.Row():
+                with gr.Column():
+                    file_upload = gr.File(
+                        file_count="multiple",
+                        file_types=[".pdf", ".docx", ".txt"],
+                        label="Upload Documents",
+                        info="Supported: PDF, DOCX, TXT files"
+                    )
+                    with gr.Row():
+                        chunk_size = gr.Slider(
+                            minimum=500,
+                            maximum=3000,
+                            value=1000,
+                            step=100,
+                            label="Chunk Size (characters)",
+                            info="Larger chunks = more context, smaller chunks = more precise"
+                        )
+                        overlap = gr.Slider(
+                            minimum=0,
+                            maximum=500,
+                            value=100,
+                            step=50,
+                            label="Chunk Overlap (characters)",
+                            info="Overlap helps maintain context between chunks"
+                        )
+                    process_btn = gr.Button(
+                        "⚡ Process Documents on ZeroGPU", variant="primary", size="lg")
+                with gr.Column():
+                    process_status = gr.Textbox(
+                        label="Processing Status",
+                        lines=15,
+                        max_lines=20,
+                        info="Processing status and results will appear here"
+                    )
+            process_btn.click(
+                process_documents,
+                inputs=[file_upload, chunk_size, overlap],
+                outputs=process_status
+            )
+        with gr.Tab("🔍 Search"):
+            gr.Markdown("### Search Your Documents with H200 Power")
+            with gr.Row():
+                with gr.Column():
+                    search_query = gr.Textbox(
+                        label="Search Query",
+                        placeholder="Enter your search query... (e.g., 'machine learning algorithms', 'financial projections')",
+                        lines=2,
+                        info="Use natural language - ColBERT understands semantic meaning"
+                    )
+                    with gr.Row():
+                        num_results = gr.Slider(
+                            minimum=1,
+                            maximum=20,
+                            value=5,
+                            step=1,
+                            label="Number of Results",
+                            info="How many relevant chunks to return"
+                        )
+                        show_chunks = gr.Checkbox(
+                            value=True,
+                            label="Show Text Chunks",
+                            info="Display the actual text content"
+                        )
+                    search_btn = gr.Button("🔍 Search with ZeroGPU", variant="primary", size="lg")
+                with gr.Column():
+                    search_results = gr.Textbox(
+                        label="Search Results",
+                        lines=18,
+                        max_lines=25,
+                        info="Semantic search results will appear here"
+                    )
+            search_btn.click(
+                search_documents,
+                inputs=[search_query, num_results, show_chunks],
+                outputs=search_results
+            )
+        with gr.Tab("ℹ️ ZeroGPU Info"):
+            gr.Markdown("""
+            ### About ZeroGPU PyLate Search
+            **🔥 Powered by NVIDIA H200 Tensor Core GPU**
+            #### 🚀 ZeroGPU Features:
+            - **70GB HBM3 Memory** - Massive capacity for large document collections
+            - **Dynamic Allocation** - GPU assigned only when functions need it
+            - **Optimized for PyTorch** - Perfect for ColBERT/PyLate workloads
+            - **Free for Pro Users** - No additional charges beyond HF Pro
+            - **Auto Scaling** - Efficient resource usage and queue management
+            #### 🧠 How ColBERT Works:
+            1. **Late Interaction** - Processes queries and documents separately
+            2. **Token-level Matching** - Fine-grained semantic understanding
+            3. **Efficient Retrieval** - Fast search with high-quality results
+            4. **GPU Acceleration** - Leverages H200 for rapid inference
+            #### 📊 Performance Benefits:
+            - **10-100x faster** than CPU-based search
+            - **Large batch processing** - 32+ documents simultaneously
+            - **Real-time search** - Sub-second query responses
+            - **Massive scale** - 70GB VRAM handles huge document sets
+            #### 🛠�� Technical Details:
+            - **Runtime Package Installation** - Automatically installs dependencies
+            - **Gradio SDK Required** - ZeroGPU doesn't support Docker
+            - **Smart Chunking** - Intelligent text segmentation with overlap
+            - **Metadata Tracking** - SQLite database for chunk information
+            #### 🎯 Usage Tips:
+            1. **Initialize first** - Required before processing documents
+            2. **Natural language queries** - ColBERT understands meaning, not just keywords
+            3. **Adjust chunk size** - Larger for context, smaller for precision
+            4. **Multiple file types** - Mix PDFs, DOCX, and TXT files
+            5. **Semantic search** - Try "concepts similar to X" type queries
+            #### 🔒 Privacy & Security:
+            - Documents processed in-memory only
+            - No permanent storage of your content
+            - Processing happens on HF infrastructure
+            - Automatic cleanup after session ends
+            ---
+            **Built with ❤️ using:**
+            - 🤖 PyLate & ColBERT for semantic search
+            - ⚡ ZeroGPU H200 for GPU acceleration
+            - 🎨 Gradio for the interface
+            - 🐍 Python ecosystem for document processing
+            """)
+    return demo
 # ===== MAIN =====
 if __name__ == "__main__":
+    print("🎉 Launching PyLate ZeroGPU Document Search interface...")
+    # Check if running on ZeroGPU
+    if torch.cuda.is_available():
+        print(f"🔥 GPU detected: {torch.cuda.get_device_name()}")
+    else:
+        print("💻 Running on CPU (GPU will be allocated when @spaces.GPU functions are called)")
+    demo = create_interface()
+    demo.launch(
+        share=False,
+        server_name="0.0.0.0",
+        server_port=7860,
+        show_error=True
+    )