Spaces:

sikeaditya
/

f1-ai

Running

App Files Files Community

AdityaAdaki commited on Mar 20

Commit

180a8b0

1 Parent(s): 5f1dc39

initial deployment

Browse files

Files changed (7) hide show

.gitignore +5 -0
app.py +121 -0
f1_ai.py +285 -0
llm_manager.py +195 -0
packages.txt +3 -0
requirements.txt +15 -0
setup.sh +4 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+.env
+__pycache__/
+*.py[cod]
+*$py.class
+.streamlit/secrets.toml

app.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import streamlit as st
+import asyncio
+import os
+from f1_ai import F1AI
+from dotenv import load_dotenv
+# Load environment variables from .streamlit/secrets.toml into os.environ
+for key, value in st.secrets.items():
+    os.environ[key] = value
+# Initialize session state
+if 'f1_ai' not in st.session_state:
+    # Use HuggingFace by default for Spaces deployment
+    st.session_state.f1_ai = F1AI(llm_provider="huggingface")
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+# Set page config
+st.set_page_config(page_title="F1-AI: Formula 1 RAG Application", layout="wide")
+# Title and description
+st.title("F1-AI: Formula 1 RAG Application")
+st.markdown("""
+This application uses Retrieval-Augmented Generation (RAG) to answer questions about Formula 1.
+""")
+# Add tabs
+tab1, tab2 = st.tabs(["Chat", "Add Content"])
+with tab1:
+    # Custom CSS for better styling
+    st.markdown("""
+    <style>
+        .stChatMessage {
+            padding: 1rem;
+            border-radius: 0.5rem;
+            margin-bottom: 1rem;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        .stChatMessage.user {
+            background-color: #f0f2f6;
+        }
+        .stChatMessage.assistant {
+            background-color: #ffffff;
+        }
+        .source-link {
+            font-size: 0.8rem;
+            color: #666;
+            text-decoration: none;
+        }
+    </style>
+    """, unsafe_allow_html=True)
+    # Display chat history with enhanced formatting
+    for message in st.session_state.chat_history:
+        with st.chat_message(message["role"]):
+            if message["role"] == "assistant" and isinstance(message["content"], dict):
+                st.markdown(message["content"]["answer"])
+                if message["content"]["sources"]:
+                    st.markdown("---")
+                    st.markdown("**Sources:**")
+                    for source in message["content"]["sources"]:
+                        st.markdown(f"- [{source['url']}]({source['url']})")
+            else:
+                st.markdown(message["content"])
+    # Question input
+    if question := st.chat_input("Ask a question about Formula 1"):
+        # Add user question to chat history
+        st.session_state.chat_history.append({"role": "user", "content": question})
+        # Display user question
+        with st.chat_message("user"):
+            st.write(question)
+        # Generate and display response with enhanced formatting
+        with st.chat_message("assistant"):
+            with st.spinner("🤔 Analyzing Formula 1 knowledge..."):
+                response = asyncio.run(st.session_state.f1_ai.ask_question(question))
+                st.markdown(response["answer"])
+                # Display sources if available
+                if response["sources"]:
+                    st.markdown("---")
+                    st.markdown("**Sources:**")
+                    for source in response["sources"]:
+                        st.markdown(f"- [{source['url']}]({source['url']})")
+        # Add assistant response to chat history
+        st.session_state.chat_history.append({"role": "assistant", "content": response})
+with tab2:
+    st.header("Add Content to Knowledge Base")
+    urls_input = st.text_area("Enter URLs (one per line)",
+                             placeholder="https://en.wikipedia.org/wiki/Formula_One\nhttps://www.formula1.com/en/latest/article....")
+    max_chunks = st.slider("Maximum chunks per URL", min_value=10, max_value=500, value=100, step=10)
+    if st.button("Ingest Data"):
+        if urls_input:
+            urls = [url.strip() for url in urls_input.split("\n") if url.strip()]
+            if urls:
+                with st.spinner(f"Ingesting data from {len(urls)} URLs... This may take several minutes."):
+                    progress_bar = st.progress(0)
+                    # Process URLs one by one for better UI feedback
+                    for i, url in enumerate(urls):
+                        st.write(f"Processing: {url}")
+                        asyncio.run(st.session_state.f1_ai.ingest([url], max_chunks_per_url=max_chunks))
+                        progress_bar.progress((i + 1) / len(urls))
+                    st.success("✅ Data ingestion complete!")
+            else:
+                st.error("Please enter at least one valid URL.")
+        else:
+            st.error("Please enter at least one URL to ingest.")
+# Add a footer with credits
+st.markdown("---")
+st.markdown("F1-AI: A Formula 1 RAG Application • Powered by Hugging Face, Pinecone, and LangChain")

f1_ai.py ADDED Viewed

	@@ -0,0 +1,285 @@

+import os
+import argparse
+import logging
+from datetime import datetime
+from dotenv import load_dotenv
+from typing import List, Dict, Any, Optional, Tuple
+from rich.console import Console
+from rich.markdown import Markdown
+from pinecone import Pinecone
+from langchain_pinecone import Pinecone as LangchainPinecone
+# Import our custom LLM Manager
+from llm_manager import LLMManager
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+console = Console()
+# Load environment variables
+load_dotenv()
+class F1AI:
+    def __init__(self, index_name: str = "f12", llm_provider: str = "huggingface"):
+        """
+        Initialize the F1-AI RAG application.
+        Args:
+            index_name (str): Name of the Pinecone index to use
+            llm_provider (str): Provider for LLM and embeddings.
+                                Options: "ollama", "huggingface", "huggingface-openai"
+        """
+        self.index_name = index_name
+        # Initialize LLM and embeddings via manager
+        self.llm_manager = LLMManager(provider=llm_provider)
+        self.llm = self.llm_manager.get_llm()
+        self.embeddings = self.llm_manager.get_embeddings()
+        # Load Pinecone API Key
+        pinecone_api_key = os.getenv("PINECONE_API_KEY")
+        if not pinecone_api_key:
+            raise ValueError("❌ Pinecone API key missing! Set PINECONE_API_KEY in environment variables.")
+        # Modify this part in f1_ai.py
+        # Initialize Pinecone with v2 client
+        try:
+            self.pc = Pinecone(api_key=pinecone_api_key)
+            # Check existing indexes
+            existing_indexes = [idx['name'] for idx in self.pc.list_indexes()]
+            if index_name not in existing_indexes:
+                console.log(f"🚀 Creating Pinecone index: {index_name}")
+                # Update the dimension to match your embedding model
+                self.pc.create_index(
+                    name=index_name,
+                    dimension=384,  # Match embedding dimensions of the model
+                    metric="cosine"
+                )
+            # Connect to Pinecone index
+            index = self.pc.Index(index_name)
+            self.vectordb = LangchainPinecone.from_existing_index(
+                index_name=index_name,
+                text_key="text",
+                embedding=self.embeddings
+            )
+            print(f"✅ Successfully connected to Pinecone index: {index_name}")
+        except Exception as e:
+            import traceback
+            print(f"⚠️ Error connecting to Pinecone: {str(e)}")
+            print(traceback.format_exc())
+            # Set vectordb to None, the application will handle this gracefully
+            self.vectordb = None
+    async def scrape(self, url: str, max_chunks: int = 100) -> List[Dict[str, Any]]:
+        """Scrape content from a URL and split into chunks with improved error handling."""
+        from playwright.async_api import async_playwright, TimeoutError
+        from langchain.text_splitter import RecursiveCharacterTextSplitter
+        from bs4 import BeautifulSoup
+        try:
+            async with async_playwright() as p:
+                browser = await p.chromium.launch()
+                page = await browser.new_page()
+                console.log(f"[blue]Loading {url}...[/blue]")
+                try:
+                    await page.goto(url, timeout=30000)
+                    # Get HTML content
+                    html_content = await page.content()
+                    soup = BeautifulSoup(html_content, 'html.parser')
+                    # Remove unwanted elements
+                    for element in soup.find_all(['script', 'style', 'nav', 'footer']):
+                        element.decompose()
+                    text = soup.get_text(separator=' ', strip=True)
+                except TimeoutError:
+                    logger.error(f"Timeout while loading {url}")
+                    return []
+                finally:
+                    await browser.close()
+            console.log(f"[green]Processing text ({len(text)} characters)...[/green]")
+            # Enhanced text cleaning
+            text = ' '.join(text.split())  # Normalize whitespace
+            # Improved text splitting with semantic boundaries
+            splitter = RecursiveCharacterTextSplitter(
+                chunk_size=512,
+                chunk_overlap=50,
+                separators=["\n\n", "\n", ".", "!", "?", ",", " "],
+                length_function=len
+            )
+            docs = splitter.create_documents([text])
+            # Limit the number of chunks
+            limited_docs = docs[:max_chunks]
+            console.log(f"[yellow]Using {len(limited_docs)} chunks out of {len(docs)} total chunks[/yellow]")
+            # Enhanced metadata
+            timestamp = datetime.now().isoformat()
+            return [{
+                "page_content": doc.page_content,
+                "metadata": {
+                    "source": url,
+                    "chunk_index": i,
+                    "total_chunks": len(limited_docs),
+                    "timestamp": timestamp
+                }
+            } for i, doc in enumerate(limited_docs)]
+        except Exception as e:
+            logger.error(f"Error scraping {url}: {str(e)}")
+            return []
+    async def ingest(self, urls: List[str], max_chunks_per_url: int = 100) -> None:
+        """Ingest data from URLs into the vector database."""
+        from langchain_community.vectorstores import Pinecone as LangchainPinecone
+        from tqdm import tqdm
+        # Create empty list to store documents
+        all_docs = []
+        # Scrape and process each URL with progress bar
+        for url in tqdm(urls, desc="Scraping URLs"):
+            chunks = await self.scrape(url, max_chunks=max_chunks_per_url)
+            all_docs.extend(chunks)
+        # Create or update vector database
+        total_docs = len(all_docs)
+        print(f"\nCreating vector database with {total_docs} documents...")
+        texts = [doc["page_content"] for doc in all_docs]
+        metadatas = [doc["metadata"] for doc in all_docs]
+        print("Starting embedding generation and uploading to Pinecone (this might take several minutes)...")
+        self.vectordb = LangchainPinecone.from_texts(
+            texts=texts,
+            embedding=self.embeddings,
+            index_name=self.index_name,
+            metadatas=metadatas,
+            text_key="text"
+        )
+        print("✅ Documents successfully uploaded to Pinecone!")
+    async def ask_question(self, question: str) -> Dict[str, Any]:
+        """Ask a question and get a response using RAG."""
+        if not self.vectordb:
+            return {"answer": "Error: Vector database not initialized. Please ingest data first.", "sources": []}
+        try:
+            # Retrieve relevant documents with similarity search
+            retriever = self.vectordb.as_retriever(
+                search_type="similarity",
+                search_kwargs={"k": 5}
+            )
+            # Get relevant documents
+            docs = retriever.get_relevant_documents(question)
+            if not docs:
+                return {
+                    "answer": "I couldn't find any relevant information in my knowledge base. Please try a different question or ingest more relevant data.",
+                    "sources": []
+                }
+            # Format context from documents
+            context = "\n\n".join([f"Document {i+1}: {doc.page_content}" for i, doc in enumerate(docs)])
+            # Create prompt for the LLM
+            prompt = f"""
+            Answer the question based on the provided context. Include relevant citations using [1], [2], etc.
+            If you're unsure or if the context doesn't contain the information, acknowledge the uncertainty.
+            Context:
+            {context}
+            Question: {question}
+            Answer with citations:
+            """
+            # Get response from LLM
+            response_text = ""
+            if hasattr(self.llm, "__call__"):  # Direct inference client wrapped function
+                response_text = self.llm(prompt)
+                # Debug response
+                logger.info(f"Raw LLM response type: {type(response_text)}")
+                if not response_text or response_text.strip() == "":
+                    logger.error("Empty response from LLM")
+                    response_text = "I apologize, but I couldn't generate a response. This might be due to an issue with the language model."
+            else:  # LangChain LLM
+                response_text = self.llm.invoke(prompt)
+            # Format sources
+            sources = [{
+                "url": doc.metadata["source"],
+                "chunk_index": doc.metadata.get("chunk_index", 0),
+                "timestamp": doc.metadata.get("timestamp", "")
+            } for doc in docs]
+            # Format response
+            formatted_response = {
+                "answer": response_text,
+                "sources": sources
+            }
+            return formatted_response
+        except Exception as e:
+            logger.error(f"Error processing question: {str(e)}")
+            return {
+                "answer": f"I apologize, but I encountered an error while processing your question: {str(e)}",
+                "sources": []
+            }
+async def main():
+    """Main function to run the application."""
+    import asyncio
+    parser = argparse.ArgumentParser(description="F1-AI: RAG Application for Formula 1 information")
+    subparsers = parser.add_subparsers(dest="command", help="Command to run")
+    # Ingest command
+    ingest_parser = subparsers.add_parser("ingest", help="Ingest data from URLs")
+    ingest_parser.add_argument("--urls", nargs="+", required=True, help="URLs to scrape")
+    ingest_parser.add_argument("--max-chunks", type=int, default=100, help="Maximum chunks per URL")
+    # Ask command
+    ask_parser = subparsers.add_parser("ask", help="Ask a question")
+    ask_parser.add_argument("question", help="Question to ask")
+    # Added provider argument with the new option
+    parser.add_argument("--provider", choices=["ollama", "huggingface", "huggingface-openai"], default="huggingface",
+                        help="Provider for LLM and embeddings (default: huggingface)")
+    args = parser.parse_args()
+    f1_ai = F1AI(llm_provider=args.provider)
+    if args.command == "ingest":
+        await f1_ai.ingest(args.urls, max_chunks_per_url=args.max_chunks)
+    elif args.command == "ask":
+        response = await f1_ai.ask_question(args.question)
+        console.print("\n[bold green]Answer:[/bold green]")
+        # Format as markdown to make it prettier
+        console.print(Markdown(response['answer']))
+        console.print("\n[bold yellow]Sources:[/bold yellow]")
+        for i, source in enumerate(response['sources']):
+            console.print(f"[{i+1}] {source['url']}")
+    else:
+        parser.print_help()
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(main())

llm_manager.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import os
+from typing import List, Dict, Any
+from huggingface_hub import InferenceClient
+from langchain_ollama import OllamaEmbeddings, OllamaLLM
+from dotenv import load_dotenv
+import numpy as np
+import logging
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# Load environment variables
+load_dotenv()
+class LLMManager:
+    """
+    Manager class for handling different LLM and embedding models.
+    Uses HuggingFace's InferenceClient directly for HuggingFace models.
+    """
+    def __init__(self, provider: str = "huggingface"):
+        """
+        Initialize the LLM Manager.
+        Args:
+            provider (str): The provider for LLM and embeddings.
+                           Options: "ollama", "huggingface", "huggingface-openai"
+        """
+        self.provider = provider
+        self.llm_client = None
+        self.embedding_client = None
+        # Initialize models based on the provider
+        if provider == "ollama":
+            self._init_ollama()
+        elif provider == "huggingface" or provider == "huggingface-openai":
+            self._init_huggingface()
+        else:
+            raise ValueError(f"Unsupported provider: {provider}. Choose 'ollama', 'huggingface', or 'huggingface-openai'")
+    def _init_ollama(self):
+        """Initialize Ollama models."""
+        self.llm = OllamaLLM(model="phi4-mini:3.8b")
+        self.embeddings = OllamaEmbeddings(model="mxbai-embed-large:latest")
+    def _init_huggingface(self):
+        """Initialize HuggingFace models using InferenceClient directly."""
+        # Get API key from environment
+        api_key = os.getenv("HUGGINGFACE_API_KEY")
+        if not api_key:
+            raise ValueError("HuggingFace API key not found. Set HUGGINGFACE_API_KEY in environment variables.")
+        llm_endpoint = "mistralai/Mixtral-8x7B-Instruct-v0.1"
+        embedding_endpoint = "sentence-transformers/all-MiniLM-L6-v2"
+        # Initialize InferenceClient for LLM
+        self.llm_client = InferenceClient(
+            model=llm_endpoint,
+            token=api_key
+        )
+        # Initialize InferenceClient for embeddings
+        self.embedding_client = InferenceClient(
+            model=embedding_endpoint,
+            token=api_key
+        )
+        # Store generation parameters
+        self.generation_kwargs = {
+            "temperature": 0.7,
+            "max_new_tokens": 512,  # Reduced to avoid potential token limit issues
+            "repetition_penalty": 1.1,
+            "do_sample": True,
+            "top_k": 50,
+            "top_p": 0.9,
+            "return_full_text": False  # Only return the generated text, not the prompt
+        }
+    # LLM methods for compatibility with LangChain
+    def get_llm(self):
+        """
+        Return a callable object that mimics LangChain LLM interface.
+        For huggingface providers, this returns a function that calls the InferenceClient.
+        """
+        if self.provider == "ollama":
+            return self.llm
+        else:
+            # Return a function that wraps the InferenceClient for LLM
+            def llm_function(prompt, **kwargs):
+                params = {**self.generation_kwargs, **kwargs}
+                try:
+                    logger.info(f"Sending prompt to HuggingFace (length: {len(prompt)})")
+                    response = self.llm_client.text_generation(
+                        prompt,
+                        details=True,  # Get detailed response
+                        **params
+                    )
+                    # Extract generated text from response
+                    if isinstance(response, dict) and 'generated_text' in response:
+                        response = response['generated_text']
+                    logger.info(f"Received response from HuggingFace (length: {len(response) if response else 0})")
+                    # Ensure we get a valid string response
+                    if not response or not isinstance(response, str) or response.strip() == "":
+                        logger.warning("Empty or invalid response from HuggingFace, using fallback")
+                        return "I couldn't generate a proper response based on the available information."
+                    return response
+                except Exception as e:
+                    logger.error(f"Error during LLM inference: {str(e)}")
+                    return f"Error generating response: {str(e)}"
+            # Add async capability
+            async def allm_function(prompt, **kwargs):
+                params = {**self.generation_kwargs, **kwargs}
+                try:
+                    response = await self.llm_client.text_generation(
+                        prompt,
+                        **params,
+                        stream=False
+                    )
+                    # Ensure we get a valid string response
+                    if not response or not isinstance(response, str) or response.strip() == "":
+                        logger.warning("Empty or invalid response from HuggingFace async, using fallback")
+                        return "I couldn't generate a proper response based on the available information."
+                    return response
+                except Exception as e:
+                    logger.error(f"Error during async LLM inference: {str(e)}")
+                    return f"Error generating response: {str(e)}"
+            llm_function.ainvoke = allm_function
+            return llm_function
+    # Embeddings methods for compatibility with LangChain
+    def get_embeddings(self):
+        """
+        Return a callable object that mimics LangChain Embeddings interface.
+        For huggingface providers, this returns an object with embed_documents and embed_query methods.
+        """
+        if self.provider == "ollama":
+            return self.embeddings
+        else:
+            # Create a wrapper object that has the expected methods
+            class EmbeddingsWrapper:
+                def __init__(self, client):
+                    self.client = client
+                def embed_documents(self, texts: List[str]) -> List[List[float]]:
+                    """Embed multiple documents."""
+                    embeddings = []
+                    # Process in batches to avoid overwhelming the API
+                    batch_size = 8
+                    for i in range(0, len(texts), batch_size):
+                        batch = texts[i:i+batch_size]
+                        try:
+                            batch_embeddings = self.client.feature_extraction(batch)
+                            # Convert to standard Python list format
+                            batch_results = [list(map(float, embedding)) for embedding in batch_embeddings]
+                            embeddings.extend(batch_results)
+                        except Exception as e:
+                            logger.error(f"Error embedding batch {i}: {str(e)}")
+                            # Return zero vectors as fallback
+                            for _ in range(len(batch)):
+                                embeddings.append([0.0] * 384)  # Use correct dimension
+                    return embeddings
+                def embed_query(self, text: str) -> List[float]:
+                    """Embed a single query."""
+                    try:
+                        embedding = self.client.feature_extraction(text)
+                        if isinstance(embedding, list) and len(embedding) > 0:
+                            # If it returns a batch (list of embeddings) for a single input
+                            return list(map(float, embedding[0]))
+                        # If it returns a single embedding
+                        return list(map(float, embedding))
+                    except Exception as e:
+                        logger.error(f"Error embedding query: {str(e)}")
+                        # Return zero vector as fallback
+                        return [0.0] * 384  # Use correct dimension
+                # Make the class callable to fix the TypeError
+                def __call__(self, texts):
+                    """Make the object callable for compatibility with LangChain."""
+                    if isinstance(texts, str):
+                        return self.embed_query(texts)
+                    elif isinstance(texts, list):
+                        return self.embed_documents(texts)
+                    else:
+                        raise ValueError(f"Unsupported input type: {type(texts)}")
+            return EmbeddingsWrapper(self.embedding_client)

packages.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+chromium
+wget
+ca-certificates

requirements.txt ADDED Viewed

	@@ -0,0 +1,15 @@

+langchain==0.1.0
+langchain-community==0.0.10
+langchain-core==0.1.0
+langchain-pinecone==0.0.1
+langchain-ollama==0.0.1
+pinecone-client==3.0.1
+huggingface-hub==0.20.1
+streamlit==1.29.0
+playwright==1.40.0
+beautifulsoup4==4.12.2
+tqdm==4.66.1
+python-dotenv==1.0.0
+typing-extensions==4.8.0
+rich==13.7.0
+# Remove asyncio package as it's part of Python standard library

setup.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/bin/bash
+# Install playwright browsers
+pip install playwright
+playwright install chromium