ariansyahdedy commited on
Commit
2b5105c
·
1 Parent(s): 2eb3e22

test MarkitDown

Browse files
Files changed (1) hide show
  1. app/main.py +32 -12
app/main.py CHANGED
@@ -10,11 +10,13 @@ from typing import Dict, List
10
  from prometheus_client import Counter, Histogram, start_http_server
11
  from pydantic import BaseModel, ValidationError
12
  from app.services.message import generate_reply, send_reply
13
- import logging
14
  from datetime import datetime
15
  from sentence_transformers import SentenceTransformer
16
  from app.search.rag_pipeline import RAGSystem
17
  from contextlib import asynccontextmanager
 
 
18
  # from app.db.database import create_indexes, init_db
19
  from app.services.webhook_handler import verify_webhook
20
  from app.handlers.message_handler import MessageHandler
@@ -26,6 +28,7 @@ from app.api.api_prompt import prompt_router
26
  from app.api.api_file import file_router
27
  from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
28
 
 
29
  from markitdown import MarkItDown
30
 
31
  # Configure logging
@@ -152,17 +155,34 @@ async def webhook(request: Request):
152
  app.get("/webhook")(verify_webhook)
153
 
154
  @app.post("/load_file")
155
- async def load_file_with_markitdown(file_path:str, llm_client:str=None, model:str=None):
156
-
157
- if llm_client and model:
158
- markitdown = MarkItDown(llm_client, model)
159
- documents = markitdown.convert(file_path)
160
- else:
161
- markitdown = MarkItDown()
162
- documents = markitdown.convert(file_path)
163
-
164
- print(f"documents: {documents}")
165
- return documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  # Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
167
  @app.get("/metrics")
168
  async def metrics():
 
10
  from prometheus_client import Counter, Histogram, start_http_server
11
  from pydantic import BaseModel, ValidationError
12
  from app.services.message import generate_reply, send_reply
13
+ import logging, httpx
14
  from datetime import datetime
15
  from sentence_transformers import SentenceTransformer
16
  from app.search.rag_pipeline import RAGSystem
17
  from contextlib import asynccontextmanager
18
+
19
+ from httpx import ConnectTimeout, ReadTimeout, HTTPStatusError
20
  # from app.db.database import create_indexes, init_db
21
  from app.services.webhook_handler import verify_webhook
22
  from app.handlers.message_handler import MessageHandler
 
28
  from app.api.api_file import file_router
29
  from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
30
 
31
+
32
  from markitdown import MarkItDown
33
 
34
  # Configure logging
 
155
  app.get("/webhook")(verify_webhook)
156
 
157
  @app.post("/load_file")
158
+ async def load_file_with_markitdown(file_path: str, llm_client: str = None, model: str = None):
159
+ try:
160
+ async with httpx.AsyncClient(timeout=10) as client:
161
+ response = await client.get(file_path)
162
+ response.raise_for_status()
163
+ content = response.text
164
+
165
+ # Initialize MarkItDown
166
+ if llm_client and model:
167
+ markitdown = MarkItDown(llm_client, model)
168
+ else:
169
+ markitdown = MarkItDown()
170
+
171
+ # Convert the fetched content
172
+ documents = markitdown.convert_content(content) # Assuming a method that accepts raw content
173
+
174
+ print(f"documents: {documents}")
175
+ return {"documents": documents}
176
+
177
+ except ConnectTimeout:
178
+ raise HTTPException(status_code=504, detail="Connection to the external server timed out.")
179
+ except ReadTimeout:
180
+ raise HTTPException(status_code=504, detail="The external server took too long to respond.")
181
+ except HTTPStatusError as e:
182
+ raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
183
+ except Exception as e:
184
+ raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
185
+
186
  # Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
187
  @app.get("/metrics")
188
  async def metrics():