Spaces:

poemsforaphrodite
/

bot

Sleeping

App Files Files Community

poemsforaphrodite commited on Jan 16

Commit

58249fe

verified ·

1 Parent(s): 9e0da7d

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile +25 -0
main.py +191 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,25 @@

+FROM python:3.11-slim
+WORKDIR /code
+# Install system dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    build-essential \
+    && rm -rf /var/lib/apt/lists/*
+# Copy requirements first for better caching
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the rest of the application
+COPY . .
+# Create necessary directories
+RUN mkdir -p static
+# Expose the port
+EXPOSE 7860
+# Command to run the application
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

main.py ADDED Viewed

	@@ -0,0 +1,191 @@

+# main.py
+import os
+import uuid
+from typing import List
+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import JSONResponse, HTMLResponse
+from fastapi.staticfiles import StaticFiles
+import pinecone
+import openai
+from dotenv import load_dotenv
+import PyPDF2
+import io
+# Load environment variables from .env file
+load_dotenv()
+# Initialize FastAPI app
+app = FastAPI()
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Configure OpenAI
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# Pinecone configuration
+PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
+PINECONE_ENV = os.getenv("PINECONE_ENV")
+INDEX_NAME = "main"
+VECTOR_DIM = 3072  # Dimension for 'text-embedding-ada-002' embeddings
+# Initialize Pinecone
+pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
+if INDEX_NAME not in pc.list_indexes().names():
+    pc.create_index(
+        name=INDEX_NAME,
+        dimension=VECTOR_DIM,
+        metric='cosine'
+    )
+index = pc.Index(INDEX_NAME)
+# In-memory store for bot metadata (for demonstration)
+bots = {}
+def generate_gpt4o_mini_response(context: str, query: str) -> str:
+    """
+    Generate a response using OpenAI's GPT model.
+    Uses the chat completions API with the latest model.
+    """
+    client = openai.OpenAI()
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant that answers questions based on the given context."},
+        {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
+    ]
+    response = client.chat.completions.create(
+        model="gpt-4o-mini",  # You can also use "gpt-4" if you have access
+        messages=messages,
+        max_tokens=150,
+        temperature=0.7
+    )
+    return response.choices[0].message.content.strip()
+@app.post("/upload-documents")
+async def upload_documents(files: List[UploadFile] = File(...)):
+    """
+    Accepts file uploads, processes PDFs and other text documents,
+    generates embeddings using OpenAI, stores them in Pinecone,
+    and returns a unique botid.
+    """
+    client = openai.OpenAI()
+    botid = str(uuid.uuid4())
+    bots[botid] = {"vectors": []}
+    for file in files:
+        # Read file content
+        content = await file.read()
+        # Process different file types
+        if file.filename.lower().endswith('.pdf'):
+            # Handle PDF files
+            pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
+            text = ""
+            for page in pdf_reader.pages:
+                text += page.extract_text() + "\n"
+        else:
+            # Handle other text files
+            text = content.decode('utf-8', errors='ignore')
+        # Generate embedding using OpenAI
+        embedding_response = client.embeddings.create(
+            input=text,
+            model="text-embedding-3-large"
+        )
+        vector = embedding_response.data[0].embedding
+        # Create a unique ID for this vector
+        vector_id = f"{botid}_{file.filename}_{uuid.uuid4()}"
+        # Upsert the vector into Pinecone with metadata including the text content
+        index.upsert(vectors=[(vector_id, vector, {
+            "botid": botid,
+            "filename": file.filename,
+            "text": text
+        })])
+        bots[botid]["vectors"].append(vector_id)
+    return {"botid": botid}
+@app.post("/query")
+async def query_endpoint(botid: str = Form(...), query: str = Form(...)):
+    """
+    Accepts a botid and user query, retrieves relevant vectors from Pinecone,
+    and returns a response generated using GPT-4o-mini proxy.
+    """
+    client = openai.OpenAI()
+    # Generate embedding for the query using OpenAI
+    query_embedding_response = client.embeddings.create(
+        input=query,
+        model="text-embedding-3-large"
+    )
+    query_vector = query_embedding_response.data[0].embedding
+    # Query Pinecone for similar vectors associated with the given botid
+    response = index.query(
+        vector=query_vector,
+        top_k=5,
+        filter={"botid": {"$eq": botid}},
+        include_metadata=True
+    )
+    # Process the response matches
+    matches = response.matches if hasattr(response, 'matches') else []
+    # If no matches found, the bot doesn't exist or has no content
+    if not matches:
+        return JSONResponse(status_code=404, content={"error": "No content found for this bot"})
+    results = []
+    relevant_texts = []
+    for match in matches:
+        if hasattr(match, 'metadata') and match.metadata:
+            filename = match.metadata.get('filename', 'Unknown file')
+            text = match.metadata.get('text', '')
+            score = match.score if hasattr(match, 'score') else 0.0
+            results.append({
+                "filename": filename,
+                "score": score
+            })
+            if text:
+                relevant_texts.append(text)
+    # Create context from available results and texts
+    context = ""
+    if results:
+        context += "Relevant files: " + ", ".join([r["filename"] for r in results]) + "\n\n"
+    if relevant_texts:
+        context += "Content from relevant documents:\n" + "\n---\n".join(relevant_texts)
+    else:
+        context = "No relevant content found"
+    # Use GPT-4o-mini proxy to generate an answer
+    answer = generate_gpt4o_mini_response(context, query)
+    return {"response": answer, "matches": results}
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    with open("index.html") as f:
+        return f.read()
+@app.get("/embed-code")
+async def generate_embed_code(botid: str):
+    """
+    Generates and returns a dynamic embed code snippet for the provided botid.
+    """
+    embed_snippet = f"""
+    <div id="smartlyq-chatbot-container"></div>
+    <script>
+        (function() {{
+            var botid = '{botid}';
+            var script = document.createElement('script');
+            script.src = 'https://huggingface.co/path-to-your-hosted-js-file.js?botid=' + botid;
+            document.head.appendChild(script);
+        }})();
+    </script>
+    """
+    return {"embed_code": embed_snippet}

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi>=0.104.1
+uvicorn>=0.24.0
+python-multipart>=0.0.6
+openai>=1.3.0
+pinecone-client>=2.2.4
+python-dotenv>=1.0.0
+pydantic>=2.5.0
+typing-extensions>=4.8.0
+uuid>=1.30
+PyPDF2>=3.0.0