poemsforaphrodite commited on
Commit
58249fe
·
verified ·
1 Parent(s): 9e0da7d

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +25 -0
  2. main.py +191 -0
  3. requirements.txt +10 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /code
4
+
5
+ # Install system dependencies
6
+ RUN apt-get update && \
7
+ apt-get install -y --no-install-recommends \
8
+ build-essential \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy requirements first for better caching
12
+ COPY requirements.txt .
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Copy the rest of the application
16
+ COPY . .
17
+
18
+ # Create necessary directories
19
+ RUN mkdir -p static
20
+
21
+ # Expose the port
22
+ EXPOSE 7860
23
+
24
+ # Command to run the application
25
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
main.py ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # main.py
2
+ import os
3
+ import uuid
4
+ from typing import List
5
+ from fastapi import FastAPI, UploadFile, File, Form
6
+ from fastapi.responses import JSONResponse, HTMLResponse
7
+ from fastapi.staticfiles import StaticFiles
8
+ import pinecone
9
+ import openai
10
+ from dotenv import load_dotenv
11
+ import PyPDF2
12
+ import io
13
+
14
+ # Load environment variables from .env file
15
+ load_dotenv()
16
+
17
+ # Initialize FastAPI app
18
+ app = FastAPI()
19
+
20
+ # Mount static files
21
+ app.mount("/static", StaticFiles(directory="static"), name="static")
22
+
23
+ # Configure OpenAI
24
+ openai.api_key = os.getenv("OPENAI_API_KEY")
25
+
26
+ # Pinecone configuration
27
+ PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
28
+ PINECONE_ENV = os.getenv("PINECONE_ENV")
29
+ INDEX_NAME = "main"
30
+ VECTOR_DIM = 3072 # Dimension for 'text-embedding-ada-002' embeddings
31
+
32
+ # Initialize Pinecone
33
+ pc = pinecone.Pinecone(api_key=PINECONE_API_KEY)
34
+ if INDEX_NAME not in pc.list_indexes().names():
35
+ pc.create_index(
36
+ name=INDEX_NAME,
37
+ dimension=VECTOR_DIM,
38
+ metric='cosine'
39
+ )
40
+ index = pc.Index(INDEX_NAME)
41
+
42
+ # In-memory store for bot metadata (for demonstration)
43
+ bots = {}
44
+
45
+ def generate_gpt4o_mini_response(context: str, query: str) -> str:
46
+ """
47
+ Generate a response using OpenAI's GPT model.
48
+ Uses the chat completions API with the latest model.
49
+ """
50
+ client = openai.OpenAI()
51
+ messages = [
52
+ {"role": "system", "content": "You are a helpful assistant that answers questions based on the given context."},
53
+ {"role": "user", "content": f"Context: {context}\n\nQuestion: {query}"}
54
+ ]
55
+
56
+ response = client.chat.completions.create(
57
+ model="gpt-4o-mini", # You can also use "gpt-4" if you have access
58
+ messages=messages,
59
+ max_tokens=150,
60
+ temperature=0.7
61
+ )
62
+ return response.choices[0].message.content.strip()
63
+
64
+ @app.post("/upload-documents")
65
+ async def upload_documents(files: List[UploadFile] = File(...)):
66
+ """
67
+ Accepts file uploads, processes PDFs and other text documents,
68
+ generates embeddings using OpenAI, stores them in Pinecone,
69
+ and returns a unique botid.
70
+ """
71
+ client = openai.OpenAI()
72
+ botid = str(uuid.uuid4())
73
+ bots[botid] = {"vectors": []}
74
+
75
+ for file in files:
76
+ # Read file content
77
+ content = await file.read()
78
+
79
+ # Process different file types
80
+ if file.filename.lower().endswith('.pdf'):
81
+ # Handle PDF files
82
+ pdf_reader = PyPDF2.PdfReader(io.BytesIO(content))
83
+ text = ""
84
+ for page in pdf_reader.pages:
85
+ text += page.extract_text() + "\n"
86
+ else:
87
+ # Handle other text files
88
+ text = content.decode('utf-8', errors='ignore')
89
+
90
+ # Generate embedding using OpenAI
91
+ embedding_response = client.embeddings.create(
92
+ input=text,
93
+ model="text-embedding-3-large"
94
+ )
95
+ vector = embedding_response.data[0].embedding
96
+
97
+ # Create a unique ID for this vector
98
+ vector_id = f"{botid}_{file.filename}_{uuid.uuid4()}"
99
+
100
+ # Upsert the vector into Pinecone with metadata including the text content
101
+ index.upsert(vectors=[(vector_id, vector, {
102
+ "botid": botid,
103
+ "filename": file.filename,
104
+ "text": text
105
+ })])
106
+ bots[botid]["vectors"].append(vector_id)
107
+
108
+ return {"botid": botid}
109
+
110
+ @app.post("/query")
111
+ async def query_endpoint(botid: str = Form(...), query: str = Form(...)):
112
+ """
113
+ Accepts a botid and user query, retrieves relevant vectors from Pinecone,
114
+ and returns a response generated using GPT-4o-mini proxy.
115
+ """
116
+ client = openai.OpenAI()
117
+
118
+ # Generate embedding for the query using OpenAI
119
+ query_embedding_response = client.embeddings.create(
120
+ input=query,
121
+ model="text-embedding-3-large"
122
+ )
123
+ query_vector = query_embedding_response.data[0].embedding
124
+
125
+ # Query Pinecone for similar vectors associated with the given botid
126
+ response = index.query(
127
+ vector=query_vector,
128
+ top_k=5,
129
+ filter={"botid": {"$eq": botid}},
130
+ include_metadata=True
131
+ )
132
+
133
+ # Process the response matches
134
+ matches = response.matches if hasattr(response, 'matches') else []
135
+
136
+ # If no matches found, the bot doesn't exist or has no content
137
+ if not matches:
138
+ return JSONResponse(status_code=404, content={"error": "No content found for this bot"})
139
+
140
+ results = []
141
+ relevant_texts = []
142
+
143
+ for match in matches:
144
+ if hasattr(match, 'metadata') and match.metadata:
145
+ filename = match.metadata.get('filename', 'Unknown file')
146
+ text = match.metadata.get('text', '')
147
+ score = match.score if hasattr(match, 'score') else 0.0
148
+
149
+ results.append({
150
+ "filename": filename,
151
+ "score": score
152
+ })
153
+ if text:
154
+ relevant_texts.append(text)
155
+
156
+ # Create context from available results and texts
157
+ context = ""
158
+ if results:
159
+ context += "Relevant files: " + ", ".join([r["filename"] for r in results]) + "\n\n"
160
+ if relevant_texts:
161
+ context += "Content from relevant documents:\n" + "\n---\n".join(relevant_texts)
162
+ else:
163
+ context = "No relevant content found"
164
+
165
+ # Use GPT-4o-mini proxy to generate an answer
166
+ answer = generate_gpt4o_mini_response(context, query)
167
+
168
+ return {"response": answer, "matches": results}
169
+
170
+ @app.get("/", response_class=HTMLResponse)
171
+ async def root():
172
+ with open("index.html") as f:
173
+ return f.read()
174
+
175
+ @app.get("/embed-code")
176
+ async def generate_embed_code(botid: str):
177
+ """
178
+ Generates and returns a dynamic embed code snippet for the provided botid.
179
+ """
180
+ embed_snippet = f"""
181
+ <div id="smartlyq-chatbot-container"></div>
182
+ <script>
183
+ (function() {{
184
+ var botid = '{botid}';
185
+ var script = document.createElement('script');
186
+ script.src = 'https://huggingface.co/path-to-your-hosted-js-file.js?botid=' + botid;
187
+ document.head.appendChild(script);
188
+ }})();
189
+ </script>
190
+ """
191
+ return {"embed_code": embed_snippet}
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi>=0.104.1
2
+ uvicorn>=0.24.0
3
+ python-multipart>=0.0.6
4
+ openai>=1.3.0
5
+ pinecone-client>=2.2.4
6
+ python-dotenv>=1.0.0
7
+ pydantic>=2.5.0
8
+ typing-extensions>=4.8.0
9
+ uuid>=1.30
10
+ PyPDF2>=3.0.0