Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,74 @@ from pathlib import Path
|
|
9 |
import re
|
10 |
import uuid
|
11 |
import pymupdf
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
# os.system('pip install -U magic-pdf==0.10.5')
|
14 |
os.system('pip uninstall -y magic-pdf')
|
@@ -211,6 +279,10 @@ def to_pdf(file_path):
|
|
211 |
|
212 |
|
213 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
214 |
with gr.Blocks() as demo:
|
215 |
gr.HTML(header)
|
216 |
with gr.Row():
|
|
|
9 |
import re
|
10 |
import uuid
|
11 |
import pymupdf
|
12 |
+
import telebot
|
13 |
+
from threading import Thread
|
14 |
+
import requests
|
15 |
+
from io import BytesIO
|
16 |
+
|
17 |
+
# Add your Telegram bot token as an environment variable in Hugging Face space
|
18 |
+
BOT_TOKEN = os.getenv('BOT_TOKEN')
|
19 |
+
bot = telebot.TeleBot(BOT_TOKEN)
|
20 |
+
|
21 |
+
def download_file_from_telegram(file_id):
|
22 |
+
file_info = bot.get_file(file_id)
|
23 |
+
file_path = file_info.file_path
|
24 |
+
file_url = f'https://api.telegram.org/file/bot{BOT_TOKEN}/{file_path}'
|
25 |
+
response = requests.get(file_url)
|
26 |
+
return BytesIO(response.content)
|
27 |
+
|
28 |
+
@bot.message_handler(content_types=['document'])
|
29 |
+
def handle_docs(message):
|
30 |
+
try:
|
31 |
+
# Get file from telegram
|
32 |
+
file_id = message.document.file_id
|
33 |
+
file_name = message.document.file_name
|
34 |
+
|
35 |
+
if not file_name.lower().endswith(('.pdf', '.png', '.jpg', '.jpeg')):
|
36 |
+
bot.reply_to(message, "Please send only PDF or image files.")
|
37 |
+
return
|
38 |
+
|
39 |
+
# Download file
|
40 |
+
file_data = download_file_from_telegram(file_id)
|
41 |
+
|
42 |
+
# Save temporarily
|
43 |
+
temp_path = f"/tmp/{file_name}"
|
44 |
+
with open(temp_path, 'wb') as f:
|
45 |
+
f.write(file_data.getvalue())
|
46 |
+
|
47 |
+
# Process file using your existing function
|
48 |
+
md_content, txt_content, archive_zip_path, new_pdf_path = to_markdown(
|
49 |
+
temp_path,
|
50 |
+
end_pages=10, # default value
|
51 |
+
is_ocr=False, # default value
|
52 |
+
layout_mode="doclayout_yolo", # default value
|
53 |
+
formula_enable=True, # default value
|
54 |
+
table_enable=True, # default value
|
55 |
+
language='auto' # default value
|
56 |
+
)
|
57 |
+
|
58 |
+
# Send back results
|
59 |
+
with open(archive_zip_path, 'rb') as zip_file:
|
60 |
+
bot.send_document(message.chat.id, zip_file)
|
61 |
+
|
62 |
+
# Send markdown content in chunks if it's too long
|
63 |
+
max_length = 4096
|
64 |
+
for i in range(0, len(md_content), max_length):
|
65 |
+
chunk = md_content[i:i + max_length]
|
66 |
+
bot.send_message(message.chat.id, chunk)
|
67 |
+
|
68 |
+
# Cleanup
|
69 |
+
os.remove(temp_path)
|
70 |
+
|
71 |
+
except Exception as e:
|
72 |
+
bot.reply_to(message, f"Error processing document: {str(e)}")
|
73 |
+
|
74 |
+
@bot.message_handler(commands=['start', 'help'])
|
75 |
+
def send_welcome(message):
|
76 |
+
bot.reply_to(message, "Hello! Send me a PDF or image file and I'll process it for you.")
|
77 |
+
|
78 |
+
def run_bot():
|
79 |
+
bot.infinity_polling()
|
80 |
|
81 |
# os.system('pip install -U magic-pdf==0.10.5')
|
82 |
os.system('pip uninstall -y magic-pdf')
|
|
|
279 |
|
280 |
|
281 |
if __name__ == "__main__":
|
282 |
+
# Start bot in a separate thread
|
283 |
+
bot_thread = Thread(target=run_bot)
|
284 |
+
bot_thread.start()
|
285 |
+
|
286 |
with gr.Blocks() as demo:
|
287 |
gr.HTML(header)
|
288 |
with gr.Row():
|