Spaces:
Running
on
Zero
Running
on
Zero
#!/usr/bin/env python3 | |
""" | |
PyLate ZeroGPU Document Search with Runtime Package Installation | |
Complete version that installs all dependencies at startup if needed. | |
""" | |
import subprocess | |
import sys | |
import os | |
import time | |
print("π Starting PyLate ZeroGPU Document Search...") | |
print("π§ Checking and installing required packages...") | |
# ===== RUNTIME PACKAGE INSTALLATION ===== | |
def install_package(package, quiet=True): | |
"""Install a package at runtime.""" | |
try: | |
if quiet: | |
subprocess.check_call([ | |
sys.executable, '-m', 'pip', 'install', package, | |
'--quiet', '--disable-pip-version-check' | |
], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | |
else: | |
subprocess.check_call([sys.executable, '-m', 'pip', 'install', package]) | |
return True | |
except Exception as e: | |
print(f"β οΈ Failed to install {package}: {e}") | |
return False | |
def check_and_install_packages(): | |
"""Check and install all required packages.""" | |
# Define packages with their import names and pip names | |
packages_to_check = [ | |
# (import_name, pip_package, test_import) | |
('gradio', 'gradio==4.44.0', lambda: __import__('gradio')), | |
('spaces', 'spaces', lambda: __import__('spaces')), | |
('sentence_transformers', 'sentence-transformers', lambda: __import__('sentence_transformers')), | |
('docx', 'python-docx', lambda: __import__('docx')), | |
('fitz', 'pymupdf', lambda: __import__('fitz')), | |
('unstructured', 'unstructured', lambda: __import__('unstructured')), | |
('pandas', 'pandas', lambda: __import__('pandas')), | |
('numpy', 'numpy', lambda: __import__('numpy')), | |
('huggingface_hub', 'huggingface_hub', lambda: __import__('huggingface_hub')), | |
('accelerate', 'accelerate', lambda: __import__('accelerate')), | |
('pylate', 'pylate==1.2.0', lambda: __import__('pylate')), | |
] | |
installed_count = 0 | |
failed_packages = [] | |
for import_name, pip_package, test_func in packages_to_check: | |
try: | |
test_func() | |
print(f"β {import_name} - already installed") | |
installed_count += 1 | |
except ImportError: | |
print(f"π¦ Installing {pip_package}...") | |
success = install_package(pip_package, quiet=False) | |
if success: | |
try: | |
# Test import after installation | |
test_func() | |
print(f"β {import_name} - installed successfully") | |
installed_count += 1 | |
except ImportError: | |
print(f"β {import_name} - installation failed (import test failed)") | |
failed_packages.append(import_name) | |
else: | |
print(f"β {import_name} - installation failed") | |
failed_packages.append(import_name) | |
print(f"\nπ Installation Summary:") | |
print(f" β Successfully installed/verified: {installed_count}/{len(packages_to_check)}") | |
if failed_packages: | |
print(f" β Failed packages: {', '.join(failed_packages)}") | |
print(f" β οΈ App may not work correctly with missing packages") | |
else: | |
print(f" π All packages ready!") | |
return len(failed_packages) == 0 | |
# Install packages before importing anything else | |
installation_success = check_and_install_packages() | |
# Now import everything | |
print("\nπ Loading modules...") | |
try: | |
import gradio as gr | |
import spaces | |
import torch | |
import tempfile | |
import sqlite3 | |
import json | |
import hashlib | |
from pathlib import Path | |
from typing import List, Dict, Any, Tuple | |
print("β Core modules loaded") | |
except ImportError as e: | |
print(f"β Failed to import core modules: {e}") | |
sys.exit(1) | |
# Import document processing modules with fallbacks | |
try: | |
import docx | |
print("β python-docx loaded") | |
except ImportError: | |
print("β οΈ python-docx not available - DOCX processing will be disabled") | |
docx = None | |
try: | |
import fitz # pymupdf | |
print("β PyMuPDF loaded") | |
except ImportError: | |
print("β οΈ PyMuPDF not available - PDF processing will be limited") | |
fitz = None | |
try: | |
from unstructured.partition.auto import partition | |
print("β Unstructured loaded") | |
except ImportError: | |
print("β οΈ Unstructured not available - fallback text extraction disabled") | |
partition = None | |
try: | |
from pylate import models, indexes, retrieve | |
print("β PyLate loaded") | |
except ImportError as e: | |
print(f"β PyLate failed to load: {e}") | |
print("π Attempting to install PyLate...") | |
install_package('pylate==1.2.0', quiet=False) | |
try: | |
from pylate import models, indexes, retrieve | |
print("β PyLate loaded after installation") | |
except ImportError: | |
print("β PyLate installation failed - core functionality unavailable") | |
sys.exit(1) | |
# Set environment variables | |
os.environ["TRITON_CACHE_DIR"] = "/tmp/triton_cache" | |
os.environ["TORCH_COMPILE_DISABLE"] = "1" | |
print("π― All modules loaded successfully!\n") | |
# Global variables for PyLate components | |
model = None | |
index = None | |
retriever = None | |
metadata_db = None | |
# ===== DOCUMENT PROCESSING FUNCTIONS ===== | |
def extract_text_from_pdf(file_path: str) -> str: | |
"""Extract text from PDF file using PyMuPDF and unstructured as fallback.""" | |
text = "" | |
if not fitz: | |
return "Error: PyMuPDF not available for PDF processing" | |
try: | |
# Use PyMuPDF (fitz) - more reliable than PyPDF2 | |
doc = fitz.open(file_path) | |
for page in doc: | |
text += page.get_text() + "\n" | |
doc.close() | |
# If no text extracted, try unstructured | |
if not text.strip() and partition: | |
elements = partition(filename=file_path) | |
text = "\n".join([str(element) for element in elements]) | |
except Exception as e: | |
# Final fallback to unstructured | |
if partition: | |
try: | |
elements = partition(filename=file_path) | |
text = "\n".join([str(element) for element in elements]) | |
except: | |
text = f"Error: Could not extract text from PDF: {str(e)}" | |
else: | |
text = f"Error: Could not extract text from PDF: {str(e)}" | |
return text.strip() | |
def extract_text_from_docx(file_path: str) -> str: | |
"""Extract text from DOCX file.""" | |
if not docx: | |
return "Error: python-docx not available for DOCX processing" | |
try: | |
doc = docx.Document(file_path) | |
text = "" | |
for paragraph in doc.paragraphs: | |
text += paragraph.text + "\n" | |
return text.strip() | |
except Exception as e: | |
return f"Error: Could not extract text from DOCX: {str(e)}" | |
def extract_text_from_txt(file_path: str) -> str: | |
"""Extract text from TXT file.""" | |
try: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
return file.read().strip() | |
except UnicodeDecodeError: | |
try: | |
with open(file_path, 'r', encoding='latin1') as file: | |
return file.read().strip() | |
except Exception as e: | |
return f"Error: Could not read text file: {str(e)}" | |
except Exception as e: | |
return f"Error: Could not read text file: {str(e)}" | |
def chunk_text(text: str, chunk_size: int = 1000, overlap: int = 100) -> List[Dict[str, Any]]: | |
"""Chunk text with overlap and return metadata.""" | |
chunks = [] | |
start = 0 | |
chunk_index = 0 | |
while start < len(text): | |
end = start + chunk_size | |
chunk_text = text[start:end] | |
# Try to break at sentence boundary | |
if end < len(text): | |
last_period = chunk_text.rfind('.') | |
last_newline = chunk_text.rfind('\n') | |
break_point = max(last_period, last_newline) | |
if break_point > chunk_size * 0.7: | |
chunk_text = chunk_text[:break_point + 1] | |
end = start + break_point + 1 | |
if chunk_text.strip(): | |
chunks.append({ | |
'text': chunk_text.strip(), | |
'start': start, | |
'end': end, | |
'index': chunk_index, | |
'length': len(chunk_text.strip()) | |
}) | |
chunk_index += 1 | |
start = max(start + 1, end - overlap) | |
return chunks | |
# ===== METADATA DATABASE ===== | |
def init_metadata_db(): | |
"""Initialize SQLite database for metadata.""" | |
global metadata_db | |
db_path = "metadata.db" | |
metadata_db = sqlite3.connect(db_path, check_same_thread=False) | |
metadata_db.execute(""" | |
CREATE TABLE IF NOT EXISTS documents ( | |
doc_id TEXT PRIMARY KEY, | |
filename TEXT NOT NULL, | |
file_hash TEXT NOT NULL, | |
original_text TEXT NOT NULL, | |
chunk_index INTEGER NOT NULL, | |
total_chunks INTEGER NOT NULL, | |
chunk_start INTEGER NOT NULL, | |
chunk_end INTEGER NOT NULL, | |
chunk_size INTEGER NOT NULL, | |
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP | |
) | |
""") | |
metadata_db.execute(""" | |
CREATE INDEX IF NOT EXISTS idx_filename ON documents(filename); | |
""") | |
metadata_db.commit() | |
def add_document_metadata(doc_id: str, filename: str, file_hash: str, | |
original_text: str, chunk_info: Dict[str, Any], total_chunks: int): | |
"""Add document metadata to database.""" | |
global metadata_db | |
metadata_db.execute(""" | |
INSERT OR REPLACE INTO documents | |
(doc_id, filename, file_hash, original_text, chunk_index, total_chunks, | |
chunk_start, chunk_end, chunk_size) | |
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) | |
""", ( | |
doc_id, filename, file_hash, original_text, | |
chunk_info['index'], total_chunks, | |
chunk_info['start'], chunk_info['end'], chunk_info['length'] | |
)) | |
metadata_db.commit() | |
def get_document_metadata(doc_id: str) -> Dict[str, Any]: | |
"""Get document metadata by ID.""" | |
global metadata_db | |
cursor = metadata_db.execute( | |
"SELECT * FROM documents WHERE doc_id = ?", (doc_id,) | |
) | |
row = cursor.fetchone() | |
if row: | |
columns = [desc[0] for desc in cursor.description] | |
return dict(zip(columns, row)) | |
return {} | |
# ===== PYLATE INITIALIZATION ===== | |
# Allow 2 minutes for initialization | |
def initialize_pylate(model_name: str = "colbert-ir/colbertv2.0") -> str: | |
"""Initialize PyLate components on ZeroGPU H200.""" | |
global model, index, retriever | |
try: | |
# Initialize metadata database | |
init_metadata_db() | |
# Load ColBERT model | |
model = models.ColBERT(model_name_or_path=model_name) | |
# Move to GPU - ZeroGPU provides CUDA access | |
device_info = "CPU" | |
if torch.cuda.is_available(): | |
model = model.to('cuda') | |
device_name = torch.cuda.get_device_name() | |
device_info = f"GPU: {device_name}" | |
# Initialize PLAID index with optimized settings for ZeroGPU | |
index = indexes.PLAID( | |
index_folder="./pylate_index", | |
index_name="documents", | |
override=True, | |
kmeans_niters=1, # Reduce k-means iterations for faster setup | |
nbits=2 # Optimized for memory efficiency | |
) | |
# Initialize retriever | |
retriever = retrieve.ColBERT(index=index) | |
return f"β PyLate initialized successfully on ZeroGPU!\nπ₯ Model: {model_name}\nπ― Device: {device_info}\nπΎ VRAM: ~70GB available\nπ Ready for document processing!" | |
except Exception as e: | |
return f"β Error initializing PyLate: {str(e)}\n\nPlease check the logs for more details." | |
# ===== DOCUMENT PROCESSING ===== | |
# Allow 5 minutes for processing | |
def process_documents(files, chunk_size: int = 1000, overlap: int = 100) -> str: | |
"""Process uploaded documents and add to index using ZeroGPU.""" | |
global model, index, metadata_db | |
if not model or not index: | |
return "β Please initialize PyLate first!" | |
if not files: | |
return "β No files uploaded!" | |
try: | |
all_documents = [] | |
all_doc_ids = [] | |
processed_files = [] | |
skipped_files = [] | |
for file in files: | |
# Get file info | |
filename = Path(file.name).name | |
file_path = file.name | |
# Calculate file hash | |
with open(file_path, 'rb') as f: | |
file_hash = hashlib.md5(f.read()).hexdigest() | |
# Extract text based on file type | |
text = "" | |
if filename.lower().endswith('.pdf'): | |
if fitz: | |
text = extract_text_from_pdf(file_path) | |
else: | |
skipped_files.append(f"{filename}: PDF processing not available") | |
continue | |
elif filename.lower().endswith('.docx'): | |
if docx: | |
text = extract_text_from_docx(file_path) | |
else: | |
skipped_files.append(f"{filename}: DOCX processing not available") | |
continue | |
elif filename.lower().endswith('.txt'): | |
text = extract_text_from_txt(file_path) | |
else: | |
skipped_files.append(f"{filename}: Unsupported file type") | |
continue | |
if not text or text.startswith("Error:"): | |
skipped_files.append(f"{filename}: Failed to extract text") | |
continue | |
# Chunk the text | |
chunks = chunk_text(text, chunk_size, overlap) | |
if not chunks: | |
skipped_files.append(f"{filename}: No valid chunks created") | |
continue | |
# Process each chunk | |
for chunk in chunks: | |
doc_id = f"{filename}_chunk_{chunk['index']}" | |
all_documents.append(chunk['text']) | |
all_doc_ids.append(doc_id) | |
# Store metadata | |
add_document_metadata( | |
doc_id=doc_id, | |
filename=filename, | |
file_hash=file_hash, | |
original_text=chunk['text'], | |
chunk_info=chunk, | |
total_chunks=len(chunks) | |
) | |
processed_files.append(f"{filename}: {len(chunks)} chunks") | |
if not all_documents: | |
return "β No text could be extracted from uploaded files!\n" + "\n".join(skipped_files) | |
# Encode documents with PyLate on H200 GPU | |
document_embeddings = model.encode( | |
all_documents, | |
batch_size=32, # Optimized batch size for H200's 70GB VRAM | |
is_query=False, | |
show_progress_bar=True | |
) | |
# Add to PLAID index | |
index.add_documents( | |
documents_ids=all_doc_ids, | |
documents_embeddings=document_embeddings | |
) | |
result = f"β Successfully processed {len([f for f in files if not any(f.name in skip for skip in skipped_files)])} files on ZeroGPU H200:\n" | |
result += f"π Total chunks indexed: {len(all_documents)}\n" | |
result += f"π Documents processed:\n" | |
for file_info in processed_files: | |
result += f" β’ {file_info}\n" | |
if skipped_files: | |
result += f"\nβ οΈ Skipped files:\n" | |
for skip_info in skipped_files: | |
result += f" β’ {skip_info}\n" | |
result += f"\nπ Document index ready for search!" | |
return result | |
except Exception as e: | |
return f"β Error processing documents: {str(e)}\n\nPlease check your files and try again." | |
# ===== SEARCH FUNCTION ===== | |
# 1 minute for search | |
def search_documents(query: str, k: int = 5, show_chunks: bool = True) -> str: | |
"""Search documents using PyLate on ZeroGPU.""" | |
global model, retriever, metadata_db | |
if not model or not retriever: | |
return "β Please initialize PyLate and process documents first!" | |
if not query.strip(): | |
return "β Please enter a search query!" | |
try: | |
# Encode query on GPU | |
query_embedding = model.encode([query], is_query=True) | |
# Search | |
results = retriever.retrieve(query_embedding, k=k)[0] | |
if not results: | |
return "π No results found for your query.\n\nTry:\nβ’ Different keywords\nβ’ Broader search terms\nβ’ Check if documents were processed correctly" | |
# Format results with metadata | |
formatted_results = [f"π **Search Results for:** '{query}' (powered by ZeroGPU H200)\n"] | |
for i, result in enumerate(results): | |
doc_id = result['id'] | |
score = result['score'] | |
# Get metadata | |
metadata = get_document_metadata(doc_id) | |
formatted_results.append(f"## Result {i+1} (Relevance: {score:.3f})") | |
formatted_results.append( | |
f"**π File:** {metadata.get('filename', 'Unknown')}") | |
formatted_results.append( | |
f"**π Chunk:** {metadata.get('chunk_index', 0) + 1}/{metadata.get('total_chunks', 1)}") | |
if show_chunks: | |
text = metadata.get('original_text', '') | |
if len(text) > 400: | |
preview = text[:400] + "..." | |
else: | |
preview = text | |
formatted_results.append(f"**π¬ Text:** {preview}") | |
formatted_results.append("---") | |
formatted_results.append(f"\nπ― Found {len(results)} relevant results using ColBERT semantic search") | |
return "\n".join(formatted_results) | |
except Exception as e: | |
return f"β Error searching: {str(e)}\n\nPlease try again or check if PyLate is properly initialized." | |
# ===== GRADIO INTERFACE ===== | |
def create_interface(): | |
"""Create the Gradio interface for ZeroGPU.""" | |
with gr.Blocks(title="PyLate ZeroGPU Document Search", theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# π PyLate ZeroGPU Document Search | |
### Powered by ColBERT and NVIDIA H200 (70GB VRAM) | |
Upload documents, process them with PyLate on ZeroGPU, and perform lightning-fast semantic search! | |
**π₯ ZeroGPU Features:** | |
- π― NVIDIA H200 GPU with 70GB VRAM | |
- β‘ Dynamic GPU allocation (only when needed) | |
- π Free for HF Pro subscribers | |
- π Optimized for PyTorch/ColBERT workloads | |
- π Automatic package installation | |
""") | |
# Status indicator | |
with gr.Row(): | |
gr.Markdown(f""" | |
**π System Status:** | |
- β PyLate: Ready | |
- β Document Processing: {"PDF β " if fitz else "PDF β"} | {"DOCX β " if docx else "DOCX β"} | TXT β | |
- β ZeroGPU: Available | |
""") | |
with gr.Tab("π Setup"): | |
gr.Markdown("### Initialize PyLate System on ZeroGPU H200") | |
model_choice = gr.Dropdown( | |
choices=[ | |
"colbert-ir/colbertv2.0", | |
"sentence-transformers/all-MiniLM-L6-v2" | |
], | |
value="colbert-ir/colbertv2.0", | |
label="Select ColBERT Model", | |
info="ColBERT v2.0 is recommended for best performance" | |
) | |
init_btn = gr.Button("π Initialize PyLate on ZeroGPU", variant="primary", size="lg") | |
init_status = gr.Textbox(label="Initialization Status", lines=6, max_lines=10) | |
init_btn.click( | |
initialize_pylate, | |
inputs=model_choice, | |
outputs=init_status | |
) | |
with gr.Tab("π Document Upload"): | |
gr.Markdown("### Upload and Process Documents on H200 GPU") | |
with gr.Row(): | |
with gr.Column(): | |
file_upload = gr.File( | |
file_count="multiple", | |
file_types=[".pdf", ".docx", ".txt"], | |
label="Upload Documents", | |
info="Supported: PDF, DOCX, TXT files" | |
) | |
with gr.Row(): | |
chunk_size = gr.Slider( | |
minimum=500, | |
maximum=3000, | |
value=1000, | |
step=100, | |
label="Chunk Size (characters)", | |
info="Larger chunks = more context, smaller chunks = more precise" | |
) | |
overlap = gr.Slider( | |
minimum=0, | |
maximum=500, | |
value=100, | |
step=50, | |
label="Chunk Overlap (characters)", | |
info="Overlap helps maintain context between chunks" | |
) | |
process_btn = gr.Button( | |
"β‘ Process Documents on ZeroGPU", variant="primary", size="lg") | |
with gr.Column(): | |
process_status = gr.Textbox( | |
label="Processing Status", | |
lines=15, | |
max_lines=20, | |
info="Processing status and results will appear here" | |
) | |
process_btn.click( | |
process_documents, | |
inputs=[file_upload, chunk_size, overlap], | |
outputs=process_status | |
) | |
with gr.Tab("π Search"): | |
gr.Markdown("### Search Your Documents with H200 Power") | |
with gr.Row(): | |
with gr.Column(): | |
search_query = gr.Textbox( | |
label="Search Query", | |
placeholder="Enter your search query... (e.g., 'machine learning algorithms', 'financial projections')", | |
lines=2, | |
info="Use natural language - ColBERT understands semantic meaning" | |
) | |
with gr.Row(): | |
num_results = gr.Slider( | |
minimum=1, | |
maximum=20, | |
value=5, | |
step=1, | |
label="Number of Results", | |
info="How many relevant chunks to return" | |
) | |
show_chunks = gr.Checkbox( | |
value=True, | |
label="Show Text Chunks", | |
info="Display the actual text content" | |
) | |
search_btn = gr.Button("π Search with ZeroGPU", variant="primary", size="lg") | |
with gr.Column(): | |
search_results = gr.Textbox( | |
label="Search Results", | |
lines=18, | |
max_lines=25, | |
info="Semantic search results will appear here" | |
) | |
search_btn.click( | |
search_documents, | |
inputs=[search_query, num_results, show_chunks], | |
outputs=search_results | |
) | |
with gr.Tab("βΉοΈ ZeroGPU Info"): | |
gr.Markdown(""" | |
### About ZeroGPU PyLate Search | |
**π₯ Powered by NVIDIA H200 Tensor Core GPU** | |
#### π ZeroGPU Features: | |
- **70GB HBM3 Memory** - Massive capacity for large document collections | |
- **Dynamic Allocation** - GPU assigned only when functions need it | |
- **Optimized for PyTorch** - Perfect for ColBERT/PyLate workloads | |
- **Free for Pro Users** - No additional charges beyond HF Pro | |
- **Auto Scaling** - Efficient resource usage and queue management | |
#### π§ How ColBERT Works: | |
1. **Late Interaction** - Processes queries and documents separately | |
2. **Token-level Matching** - Fine-grained semantic understanding | |
3. **Efficient Retrieval** - Fast search with high-quality results | |
4. **GPU Acceleration** - Leverages H200 for rapid inference | |
#### π Performance Benefits: | |
- **10-100x faster** than CPU-based search | |
- **Large batch processing** - 32+ documents simultaneously | |
- **Real-time search** - Sub-second query responses | |
- **Massive scale** - 70GB VRAM handles huge document sets | |
#### π οΈ Technical Details: | |
- **Runtime Package Installation** - Automatically installs dependencies | |
- **Gradio SDK Required** - ZeroGPU doesn't support Docker | |
- **Smart Chunking** - Intelligent text segmentation with overlap | |
- **Metadata Tracking** - SQLite database for chunk information | |
#### π― Usage Tips: | |
1. **Initialize first** - Required before processing documents | |
2. **Natural language queries** - ColBERT understands meaning, not just keywords | |
3. **Adjust chunk size** - Larger for context, smaller for precision | |
4. **Multiple file types** - Mix PDFs, DOCX, and TXT files | |
5. **Semantic search** - Try "concepts similar to X" type queries | |
#### π Privacy & Security: | |
- Documents processed in-memory only | |
- No permanent storage of your content | |
- Processing happens on HF infrastructure | |
- Automatic cleanup after session ends | |
--- | |
**Built with β€οΈ using:** | |
- π€ PyLate & ColBERT for semantic search | |
- β‘ ZeroGPU H200 for GPU acceleration | |
- π¨ Gradio for the interface | |
- π Python ecosystem for document processing | |
""") | |
return demo | |
# ===== MAIN ===== | |
if __name__ == "__main__": | |
print("π Launching PyLate ZeroGPU Document Search interface...") | |
# Check if running on ZeroGPU | |
if torch.cuda.is_available(): | |
print(f"π₯ GPU detected: {torch.cuda.get_device_name()}") | |
else: | |
print("π» Running on CPU (GPU will be allocated when @spaces.GPU functions are called)") | |
demo = create_interface() | |
demo.launch( | |
share=False, | |
server_name="0.0.0.0", | |
server_port=7860, | |
show_error=True | |
) |