Dileep7729 commited on
Commit
96f8896
·
verified ·
1 Parent(s): cbb30bc

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -66
app.py DELETED
@@ -1,66 +0,0 @@
1
- import gradio as gr
2
- import torch
3
- from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
4
- import pytesseract
5
-
6
- # Set the Tesseract executable path (for Windows users)
7
- pytesseract.pytesseract.tesseract_cmd = r"C:\\Program Files\\Tesseract-OCR\\tesseract.exe"
8
-
9
- # Load the model and processor
10
- processor = LayoutLMv3Processor.from_pretrained("quadranttechnologies/Table_OCR")
11
- model = LayoutLMv3ForTokenClassification.from_pretrained("quadranttechnologies/Table_OCR")
12
- model.eval()
13
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
14
- model.to(device)
15
-
16
- def process_image(image):
17
- try:
18
- # Preprocess the image using the processor
19
- encoding = processor(image, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
20
-
21
- # Move inputs to the same device as the model
22
- encoding = {key: val.to(device) for key, val in encoding.items()}
23
-
24
- # Perform inference
25
- with torch.no_grad():
26
- outputs = model(**encoding)
27
- predictions = torch.argmax(outputs.logits, dim=-1)
28
-
29
- # Extract input IDs, bounding boxes, and predicted labels
30
- words = encoding["input_ids"]
31
- bboxes = encoding["bbox"]
32
- labels = predictions.squeeze().tolist()
33
-
34
- # Format output as JSON
35
- structured_output = []
36
- for word_id, bbox, label in zip(words.squeeze().tolist(), bboxes.squeeze().tolist(), labels):
37
- # Decode the word ID to text
38
- word = processor.tokenizer.decode([word_id]).strip()
39
- if word: # Avoid adding empty words
40
- structured_output.append({
41
- "word": word,
42
- "bounding_box": bbox,
43
- "label": model.config.id2label[label] # Convert label ID to label name
44
- })
45
-
46
- return structured_output
47
-
48
- except Exception as e:
49
- return {"error": str(e)} # Return error details if any issue occurs
50
-
51
- # Define the Gradio interface
52
- interface = gr.Interface(
53
- fn=process_image,
54
- inputs=gr.Image(type="pil"), # Accepts image input
55
- outputs="json", # Outputs JSON structure
56
- title="Table OCR",
57
- description="Upload an image (e.g., receipt or document) to extract structured information in JSON format."
58
- )
59
-
60
- # Launch the app
61
- if __name__ == "__main__":
62
- interface.launch(share=True)
63
-
64
-
65
-
66
-