ariansyahdedy commited on
Commit
9282958
·
1 Parent(s): 319a5e7

test markitdown

Browse files
Files changed (1) hide show
  1. app/main.py +12 -64
app/main.py CHANGED
@@ -10,13 +10,11 @@ from typing import Dict, List
10
  from prometheus_client import Counter, Histogram, start_http_server
11
  from pydantic import BaseModel, ValidationError
12
  from app.services.message import generate_reply, send_reply
13
- import logging, httpx
14
  from datetime import datetime
15
  from sentence_transformers import SentenceTransformer
16
  from app.search.rag_pipeline import RAGSystem
17
  from contextlib import asynccontextmanager
18
-
19
- from httpx import ConnectTimeout, ReadTimeout, HTTPStatusError
20
  # from app.db.database import create_indexes, init_db
21
  from app.services.webhook_handler import verify_webhook
22
  from app.handlers.message_handler import MessageHandler
@@ -28,7 +26,6 @@ from app.api.api_prompt import prompt_router
28
  from app.api.api_file import file_router
29
  from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
30
 
31
-
32
  from markitdown import MarkItDown
33
 
34
  # Configure logging
@@ -154,67 +151,18 @@ async def webhook(request: Request):
154
 
155
  app.get("/webhook")(verify_webhook)
156
 
157
-
158
- @app.get("/test_connection")
159
- async def test_connection():
160
- url = "https://sswalfa.surabaya.go.id/info/detail/pengganti-ipt"
161
- try:
162
- async with httpx.AsyncClient(timeout=10) as client:
163
- response = await client.get(url)
164
- response.raise_for_status()
165
- return {"status": "success", "data": response.text[:200]} # Return first 200 chars
166
- except httpx.ConnectTimeout:
167
- raise HTTPException(status_code=504, detail="Connection timed out.")
168
- except httpx.HTTPStatusError as e:
169
- raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
170
- except Exception as e:
171
- raise HTTPException(status_code=500, detail=str(e))
172
-
173
- @app.get("/test_other_connection")
174
- async def test_other_connection():
175
- url = "https://www.google.com"
176
- try:
177
- async with httpx.AsyncClient(timeout=10) as client:
178
- response = await client.get(url)
179
- response.raise_for_status()
180
- return {"status": "success", "data": response.text[:200]} # Return first 200 chars
181
- except httpx.ConnectTimeout:
182
- raise HTTPException(status_code=504, detail="Connection timed out.")
183
- except httpx.HTTPStatusError as e:
184
- raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
185
- except Exception as e:
186
- raise HTTPException(status_code=500, detail=str(e))
187
-
188
  @app.post("/load_file")
189
- async def load_file_with_markitdown(file_path: str, llm_client: str = None, model: str = None):
190
- try:
191
- async with httpx.AsyncClient(timeout=10) as client:
192
- response = await client.get(file_path)
193
- response.raise_for_status()
194
- print(f"response: {response.json()}")
195
- content = response.json()
196
-
197
- # Initialize MarkItDown
198
- if llm_client and model:
199
- markitdown = MarkItDown(llm_client, model)
200
- else:
201
- markitdown = MarkItDown()
202
-
203
- # Convert the fetched content
204
- documents = markitdown.convert(content) # Assuming a method that accepts raw content
205
-
206
- print(f"documents: {documents}")
207
- return {"documents": documents}
208
-
209
- except ConnectTimeout:
210
- raise HTTPException(status_code=504, detail="Connection to the external server timed out.")
211
- except ReadTimeout:
212
- raise HTTPException(status_code=504, detail="The external server took too long to respond.")
213
- except HTTPStatusError as e:
214
- raise HTTPException(status_code=e.response.status_code, detail=e.response.text)
215
- except Exception as e:
216
- raise HTTPException(status_code=500, detail=f"Internal server error: {str(e)}")
217
-
218
  # Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
219
  @app.get("/metrics")
220
  async def metrics():
 
10
  from prometheus_client import Counter, Histogram, start_http_server
11
  from pydantic import BaseModel, ValidationError
12
  from app.services.message import generate_reply, send_reply
13
+ import logging
14
  from datetime import datetime
15
  from sentence_transformers import SentenceTransformer
16
  from app.search.rag_pipeline import RAGSystem
17
  from contextlib import asynccontextmanager
 
 
18
  # from app.db.database import create_indexes, init_db
19
  from app.services.webhook_handler import verify_webhook
20
  from app.handlers.message_handler import MessageHandler
 
26
  from app.api.api_file import file_router
27
  from app.utils.load_env import ACCESS_TOKEN, WHATSAPP_API_URL, GEMINI_API
28
 
 
29
  from markitdown import MarkItDown
30
 
31
  # Configure logging
 
151
 
152
  app.get("/webhook")(verify_webhook)
153
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  @app.post("/load_file")
155
+ async def load_file_with_markitdown(file_path:str, llm_client:str=None, model:str=None):
156
+
157
+ if llm_client and model:
158
+ markitdown = MarkItDown(llm_client, model)
159
+ documents = markitdown.convert(file_path)
160
+ else:
161
+ markitdown = MarkItDown()
162
+ documents = markitdown.convert(file_path)
163
+
164
+ print(f"documents: {documents}")
165
+ return documents
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  # Add a route for Prometheus metrics (optional, if not using a separate Prometheus server)
167
  @app.get("/metrics")
168
  async def metrics():