Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,6 +5,8 @@ import numpy as np
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from huggingface_hub import InferenceClient
|
7 |
from typing import List, Tuple
|
|
|
|
|
8 |
|
9 |
# Default settings
|
10 |
class ChatConfig:
|
@@ -40,39 +42,29 @@ def search_relevant_text(query):
|
|
40 |
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
|
41 |
return "\n".join([documents[i] for i in closest_idx[0]])
|
42 |
|
43 |
-
def
|
44 |
-
|
45 |
-
history: List[Tuple[str, str]],
|
46 |
-
system_message: str = ChatConfig.DEFAULT_SYSTEM_MSG,
|
47 |
-
max_tokens: int = ChatConfig.DEFAULT_MAX_TOKENS,
|
48 |
-
temperature: float = ChatConfig.DEFAULT_TEMP,
|
49 |
-
top_p: float = ChatConfig.DEFAULT_TOP_P
|
50 |
-
) -> str:
|
51 |
if not documents:
|
52 |
return "Please upload a PDF first."
|
53 |
|
54 |
context = search_relevant_text(message) # Get relevant content from PDF
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
messages.append({"role": "user", "content": user_msg})
|
60 |
-
if bot_msg:
|
61 |
-
messages.append({"role": "assistant", "content": bot_msg})
|
62 |
-
|
63 |
-
messages.append({"role": "user", "content": f"Context: {context}\nQuestion: {message}"})
|
64 |
|
65 |
response = ""
|
66 |
for chunk in client.chat_completion(
|
67 |
messages,
|
68 |
-
max_tokens=
|
69 |
stream=True,
|
70 |
-
temperature=
|
71 |
-
top_p=
|
72 |
):
|
73 |
token = chunk.choices[0].delta.content or ""
|
74 |
response += token
|
75 |
-
|
|
|
76 |
|
77 |
def handle_upload(pdf_file):
|
78 |
"""Handles PDF upload and creates vector DB"""
|
@@ -100,13 +92,31 @@ def create_interface() -> gr.Blocks:
|
|
100 |
|
101 |
# Chat function
|
102 |
send_button.click(
|
103 |
-
|
104 |
-
inputs=[user_input
|
105 |
outputs=[output]
|
106 |
)
|
107 |
|
108 |
return interface
|
109 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
if __name__ == "__main__":
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
from sentence_transformers import SentenceTransformer
|
6 |
from huggingface_hub import InferenceClient
|
7 |
from typing import List, Tuple
|
8 |
+
from fastapi import FastAPI, Query
|
9 |
+
import uvicorn
|
10 |
|
11 |
# Default settings
|
12 |
class ChatConfig:
|
|
|
42 |
_, closest_idx = index.search(np.array(query_embedding, dtype=np.float32), k=3)
|
43 |
return "\n".join([documents[i] for i in closest_idx[0]])
|
44 |
|
45 |
+
def generate_response_sync(message: str) -> str:
|
46 |
+
"""Generates response synchronously for FastAPI"""
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
if not documents:
|
48 |
return "Please upload a PDF first."
|
49 |
|
50 |
context = search_relevant_text(message) # Get relevant content from PDF
|
51 |
+
messages = [
|
52 |
+
{"role": "system", "content": ChatConfig.DEFAULT_SYSTEM_MSG},
|
53 |
+
{"role": "user", "content": f"Context: {context}\nQuestion: {message}"}
|
54 |
+
]
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
response = ""
|
57 |
for chunk in client.chat_completion(
|
58 |
messages,
|
59 |
+
max_tokens=ChatConfig.DEFAULT_MAX_TOKENS,
|
60 |
stream=True,
|
61 |
+
temperature=ChatConfig.DEFAULT_TEMP,
|
62 |
+
top_p=ChatConfig.DEFAULT_TOP_P,
|
63 |
):
|
64 |
token = chunk.choices[0].delta.content or ""
|
65 |
response += token
|
66 |
+
|
67 |
+
return response
|
68 |
|
69 |
def handle_upload(pdf_file):
|
70 |
"""Handles PDF upload and creates vector DB"""
|
|
|
92 |
|
93 |
# Chat function
|
94 |
send_button.click(
|
95 |
+
generate_response_sync,
|
96 |
+
inputs=[user_input],
|
97 |
outputs=[output]
|
98 |
)
|
99 |
|
100 |
return interface
|
101 |
|
102 |
+
# FastAPI Integration
|
103 |
+
app = FastAPI()
|
104 |
+
|
105 |
+
@app.get("/chat")
|
106 |
+
def chat_with_pdf(msg: str = Query(..., title="User Message")):
|
107 |
+
"""API endpoint to receive a message and return AI response"""
|
108 |
+
response = generate_response_sync(msg)
|
109 |
+
return {"response": response}
|
110 |
+
|
111 |
if __name__ == "__main__":
|
112 |
+
import threading
|
113 |
+
|
114 |
+
# Start Gradio UI in a separate thread
|
115 |
+
def run_gradio():
|
116 |
+
gradio_app = create_interface()
|
117 |
+
gradio_app.launch(server_name="0.0.0.0", server_port=7860, share=True)
|
118 |
+
|
119 |
+
threading.Thread(target=run_gradio).start()
|
120 |
+
|
121 |
+
# Start FastAPI
|
122 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|