dmitrynovikov2121 commited on
Commit
3dc8630
Β·
verified Β·
1 Parent(s): a8c5952

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -0
app.py CHANGED
@@ -9,6 +9,74 @@ from pathlib import Path
9
  import re
10
  import uuid
11
  import pymupdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
 
13
  # os.system('pip install -U magic-pdf==0.10.5')
14
  os.system('pip uninstall -y magic-pdf')
@@ -211,6 +279,10 @@ def to_pdf(file_path):
211
 
212
 
213
  if __name__ == "__main__":
 
 
 
 
214
  with gr.Blocks() as demo:
215
  gr.HTML(header)
216
  with gr.Row():
 
9
  import re
10
  import uuid
11
  import pymupdf
12
+ import telebot
13
+ from threading import Thread
14
+ import requests
15
+ from io import BytesIO
16
+
17
+ # Add your Telegram bot token as an environment variable in Hugging Face space
18
+ BOT_TOKEN = os.getenv('BOT_TOKEN')
19
+ bot = telebot.TeleBot(BOT_TOKEN)
20
+
21
+ def download_file_from_telegram(file_id):
22
+ file_info = bot.get_file(file_id)
23
+ file_path = file_info.file_path
24
+ file_url = f'https://api.telegram.org/file/bot{BOT_TOKEN}/{file_path}'
25
+ response = requests.get(file_url)
26
+ return BytesIO(response.content)
27
+
28
+ @bot.message_handler(content_types=['document'])
29
+ def handle_docs(message):
30
+ try:
31
+ # Get file from telegram
32
+ file_id = message.document.file_id
33
+ file_name = message.document.file_name
34
+
35
+ if not file_name.lower().endswith(('.pdf', '.png', '.jpg', '.jpeg')):
36
+ bot.reply_to(message, "Please send only PDF or image files.")
37
+ return
38
+
39
+ # Download file
40
+ file_data = download_file_from_telegram(file_id)
41
+
42
+ # Save temporarily
43
+ temp_path = f"/tmp/{file_name}"
44
+ with open(temp_path, 'wb') as f:
45
+ f.write(file_data.getvalue())
46
+
47
+ # Process file using your existing function
48
+ md_content, txt_content, archive_zip_path, new_pdf_path = to_markdown(
49
+ temp_path,
50
+ end_pages=10, # default value
51
+ is_ocr=False, # default value
52
+ layout_mode="doclayout_yolo", # default value
53
+ formula_enable=True, # default value
54
+ table_enable=True, # default value
55
+ language='auto' # default value
56
+ )
57
+
58
+ # Send back results
59
+ with open(archive_zip_path, 'rb') as zip_file:
60
+ bot.send_document(message.chat.id, zip_file)
61
+
62
+ # Send markdown content in chunks if it's too long
63
+ max_length = 4096
64
+ for i in range(0, len(md_content), max_length):
65
+ chunk = md_content[i:i + max_length]
66
+ bot.send_message(message.chat.id, chunk)
67
+
68
+ # Cleanup
69
+ os.remove(temp_path)
70
+
71
+ except Exception as e:
72
+ bot.reply_to(message, f"Error processing document: {str(e)}")
73
+
74
+ @bot.message_handler(commands=['start', 'help'])
75
+ def send_welcome(message):
76
+ bot.reply_to(message, "Hello! Send me a PDF or image file and I'll process it for you.")
77
+
78
+ def run_bot():
79
+ bot.infinity_polling()
80
 
81
  # os.system('pip install -U magic-pdf==0.10.5')
82
  os.system('pip uninstall -y magic-pdf')
 
279
 
280
 
281
  if __name__ == "__main__":
282
+ # Start bot in a separate thread
283
+ bot_thread = Thread(target=run_bot)
284
+ bot_thread.start()
285
+
286
  with gr.Blocks() as demo:
287
  gr.HTML(header)
288
  with gr.Row():