Pavan147 commited on
Commit
8e81891
·
verified ·
1 Parent(s): 4e8d812

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -17
app.py CHANGED
@@ -62,23 +62,30 @@
62
  # )
63
 
64
  # demo.launch()
 
65
  import gradio as gr
66
  from transformers import AutoProcessor, AutoModelForImageTextToText
67
  from PIL import Image
68
- import re
69
 
70
  # Load model & processor once at startup
71
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
72
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
73
 
74
- def extract_numbers_from_docling(docling_text):
75
- # Remove tags except keep content between <fcel> and <nl>
76
- # Use regex to find all numbers (integers or decimals)
77
- numbers = re.findall(r"[-+]?\d*\.\d+|\d+", docling_text)
78
- # Convert strings to floats or ints as appropriate
79
- def convert_num(s):
80
- return int(s) if s.isdigit() else float(s)
81
- return [convert_num(num) for num in numbers]
 
 
 
 
 
 
 
82
 
83
  def smoldocling_readimage(image, prompt_text):
84
  messages = [
@@ -89,11 +96,10 @@ def smoldocling_readimage(image, prompt_text):
89
  outputs = model.generate(**inputs, max_new_tokens=1024)
90
  prompt_length = inputs.input_ids.shape[1]
91
  generated = outputs[:, prompt_length:]
92
- result = processor.batch_decode(generated, skip_special_tokens=False)[0]
93
- clean_result = result.replace("<end_of_utterance>", "").strip()
94
-
95
- numbers = extract_numbers_from_docling(clean_result)
96
- return numbers
97
 
98
  # Gradio UI
99
  demo = gr.Interface(
@@ -102,9 +108,9 @@ demo = gr.Interface(
102
  gr.Image(type="pil", label="Upload Image"),
103
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
104
  ],
105
- outputs=gr.JSON(),
106
- title="SmolDocling Web App - Extract Numbers",
107
- description="Upload a document image and extract numeric values as a list."
108
  )
109
 
110
  demo.launch()
 
62
  # )
63
 
64
  # demo.launch()
65
+ import re
66
  import gradio as gr
67
  from transformers import AutoProcessor, AutoModelForImageTextToText
68
  from PIL import Image
 
69
 
70
  # Load model & processor once at startup
71
  processor = AutoProcessor.from_pretrained("ds4sd/SmolDocling-256M-preview")
72
  model = AutoModelForImageTextToText.from_pretrained("ds4sd/SmolDocling-256M-preview")
73
 
74
+ def extract_values(docling_text):
75
+ # Remove all <loc_*> tags
76
+ cleaned = re.sub(r"<loc_\d+>", "", docling_text)
77
+ # Split rows by <nl>
78
+ rows = cleaned.split("<nl>")
79
+ result = []
80
+ for row in rows:
81
+ if not row.strip():
82
+ continue
83
+ # Extract numbers inside <fcel> tags
84
+ values = re.findall(r"<fcel>(.*?)<fcel>", row)
85
+ # Convert to float list
86
+ float_values = [float(v) for v in values]
87
+ result.append(float_values)
88
+ return result
89
 
90
  def smoldocling_readimage(image, prompt_text):
91
  messages = [
 
96
  outputs = model.generate(**inputs, max_new_tokens=1024)
97
  prompt_length = inputs.input_ids.shape[1]
98
  generated = outputs[:, prompt_length:]
99
+ raw_result = processor.batch_decode(generated, skip_special_tokens=False)[0]
100
+ # Clean and extract numeric values
101
+ values_array = extract_values(raw_result)
102
+ return str(values_array)
 
103
 
104
  # Gradio UI
105
  demo = gr.Interface(
 
108
  gr.Image(type="pil", label="Upload Image"),
109
  gr.Textbox(lines=1, placeholder="Enter prompt (e.g. Convert to docling)", label="Prompt"),
110
  ],
111
+ outputs="text",
112
+ title="SmolDocling Web App",
113
+ description="Upload a document image and convert it to structured docling format."
114
  )
115
 
116
  demo.launch()