# Web Content Q&A Tool for Hugging Face Spaces # Optimized for memory constraints (2GB RAM) and 24-hour timeline # Features: Ingest up to 3 URLs, ask questions, get concise one-line answers using DistilBERT with PyTorch import gradio as gr from bs4 import BeautifulSoup import requests from sentence_transformers import SentenceTransformer, util import numpy as np from transformers import pipeline, AutoModelForQuestionAnswering, AutoTokenizer import torch from huggingface_hub import hf_hub_download, HfFolder from huggingface_hub.utils import configure_http_backend import requests as hf_requests import re # Configure Hugging Face Hub to use a custom session with increased timeout and retries def create_custom_session(): session = hf_requests.Session() # Increase timeout to 30 seconds (default is 10 seconds) adapter = hf_requests.adapters.HTTPAdapter(max_retries=3) # Retry 3 times on failure session.mount("https://", adapter) session.timeout = 30 # Set timeout to 30 seconds return session # Set the custom session for Hugging Face Hub configure_http_backend(backend_factory=create_custom_session) # Global variables for in-memory storage (reset on app restart) corpus = [] # List of paragraphs from URLs embeddings = None # Precomputed embeddings for retrieval sources_list = [] # Source URLs for each paragraph # Load models at startup (memory: ~370MB total) # Retrieval model: all-mpnet-base-v2 (~110MB, 768-dim embeddings) retriever = SentenceTransformer('all-mpnet-base-v2') # Load PyTorch model for QA # Model: distilbert-base-uncased-distilled-squad (~260MB) try: model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad") tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad") except Exception as e: print(f"Error loading model: {str(e)}. Retrying with force_download=True...") # Force re-download in case of corrupted cache model = AutoModelForQuestionAnswering.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True) tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-distilled-squad", force_download=True) # Set model to evaluation mode model.eval() # Apply quantization to the model for faster inference on CPU model = torch.quantization.quantize_dynamic( model, {torch.nn.Linear}, dtype=torch.qint8 ) # Create the QA pipeline with PyTorch qa_model = pipeline("question-answering", model=model, tokenizer=tokenizer, framework="pt", device=-1) # device=-1 for CPU # Utility function to truncate text to one line def truncate_to_one_line(text): # Split by sentence-ending punctuation and take the first sentence sentences = re.split(r'[.!?]+', text.strip()) first_sentence = sentences[0].strip() if sentences else text.strip() # If the sentence is too long, truncate to 100 characters if len(first_sentence) > 100: first_sentence = first_sentence[:100].rsplit(' ', 1)[0] + "..." return first_sentence if first_sentence else "No answer available." def ingest_urls(urls): """ Ingest up to 3 URLs, scrape content, and compute embeddings. Limits: 100 paragraphs per URL to manage memory (~0.5MB embeddings total). """ global corpus, embeddings, sources_list # Clear previous data corpus.clear() sources_list.clear() embeddings = None # Parse URLs from input (one per line, max 3) url_list = [url.strip() for url in urls.split("\n") if url.strip()][:3] if not url_list: return "Error: Please enter at least one valid URL." # Headers to mimic browser and avoid blocking headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"} # Scrape each URL for url in url_list: try: response = requests.get(url, headers=headers, timeout=5) response.raise_for_status() # Raise exception for bad status codes soup = BeautifulSoup(response.text, 'html.parser') # Extract content from
and