Spaces:
Build error
Build error
Commit
·
2b5105c
1
Parent(s):
2eb3e22
test MarkitDown
Browse files- app/main.py +32 -12
app/main.py
CHANGED
@@ -10,11 +10,13 @@ from typing import Dict, List
|
|
10 |
from prometheus_client import Counter, Histogram, start_http_server
|
11 |
from pydantic import BaseModel, ValidationError
|
12 |
from app.services.message import generate_reply, send_reply
|
13 |
-
import logging
|
14 |
from datetime import datetime
|
15 |
from sentence_transformers import SentenceTransformer
|
16 |
from app.search.rag_pipeline import RAGSystem
|
17 |
from contextlib import asynccontextmanager
|
|
|
|
|
18 |
# from app.db.database import create_indexes, init_db
|
19 |
from app.services.webhook_handler import verify_webhook
|
20 |
from app.handlers.message_handler import MessageHandler
|
@@ -26,6 +28,7 @@ from app.api.api_prompt import prompt_router
|
|
26 |
from app.api.api_file import file_router
|
27 |
from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
|
28 |
|
|
|
29 |
from markitdown import MarkItDown
|
30 |
|
31 |
# Configure logging
|
@@ -152,17 +155,34 @@ async def webhook(request: Request):
|
|
152 |
app.get("/webhook")(verify_webhook)
|
153 |
|
154 |
@app.post("/load_file")
|
155 |
-
async def load_file_with_markitdown(file_path:str, llm_client:str=None, model:str=None):
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
166 |
# Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
|
167 |
@app.get("/metrics")
|
168 |
async def metrics():
|
|
|
10 |
from prometheus_client import Counter, Histogram, start_http_server
|
11 |
from pydantic import BaseModel, ValidationError
|
12 |
from app.services.message import generate_reply, send_reply
|
13 |
+
import logging, httpx
|
14 |
from datetime import datetime
|
15 |
from sentence_transformers import SentenceTransformer
|
16 |
from app.search.rag_pipeline import RAGSystem
|
17 |
from contextlib import asynccontextmanager
|
18 |
+
|
19 |
+
from httpx import ConnectTimeout, ReadTimeout, HTTPStatusError
|
20 |
# from app.db.database import create_indexes, init_db
|
21 |
from app.services.webhook_handler import verify_webhook
|
22 |
from app.handlers.message_handler import MessageHandler
|
|
|
28 |
from app.api.api_file import file_router
|
29 |
from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
|
30 |
|
31 |
+
|
32 |
from markitdown import MarkItDown
|
33 |
|
34 |
# Configure logging
|
|
|
155 |
app.get("/webhook")(verify_webhook)
|
156 |
|
157 |
@app.post("/load_file")
|
158 |
+
async def load_file_with_markitdown(file_path: str, llm_client: str = None, model: str = None):
|
159 |
+
try:
|
160 |
+
async with httpx.AsyncClient(timeout=10) as client:
|
161 |
+
response = await client.get(file_path)
|
162 |
+
response.raise_for_status()
|
163 |
+
content = response.text
|
164 |
+
|
165 |
+
# Initialize MarkItDown
|
166 |
+
if llm_client and model:
|
167 |
+
markitdown = MarkItDown(llm_client, model)
|
168 |
+
else:
|
169 |
+
markitdown = MarkItDown()
|
170 |
+
|
171 |
+
# Convert the fetched content
|
172 |
+
documents = markitdown.convert_content(content) # Assuming a method that accepts raw content
|
173 |
+
|
174 |
+
print(f"documents: {documents}")
|
175 |
+
return {"documents": documents}
|
176 |
+
|
177 |
+
except ConnectTimeout:
|
178 |
+
raise HTTPException(status_code=504, detail="Connection to the external server timed out.")
|
179 |
+
except ReadTimeout:
|
180 |
+
raise HTTPException(status_code=504, detail="The external server took too long to respond.")
|
181 |
+
except HTTPStatusError as e:
|
182 |
+
raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
|
183 |
+
except Exception as e:
|
184 |
+
raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
|
185 |
+
|
186 |
# Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
|
187 |
@app.get("/metrics")
|
188 |
async def metrics():
|