DeepDiveDev commited on
Commit
9164d6d
Β·
verified Β·
1 Parent(s): c6111b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -43
app.py CHANGED
@@ -1,58 +1,27 @@
1
- !pip install transformers torch torchvision timm easyocr pytesseract gradio datasets huggingface_hub
2
-
3
  import gradio as gr
4
- import torch
5
- from transformers import TrOCRProcessor, VisionEncoderDecoderModel, pipeline
6
  from PIL import Image
7
  import requests
8
 
9
- # βœ… Load TrOCR model (Pretrained on Handwritten OCR)
10
- MODEL_NAME = "microsoft/trocr-base-handwritten"
11
-
12
- # βœ… Check if GPU is available
13
- device = "cuda" if torch.cuda.is_available() else "cpu"
14
 
15
- # βœ… Cache the model to prevent reloading on every request
16
- processor = TrOCRProcessor.from_pretrained(MODEL_NAME)
17
- model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME).to(device)
18
-
19
- # βœ… Function to extract text
20
  def extract_text(image):
21
  image = Image.open(image).convert("RGB")
22
-
23
- # Convert Image to Model Format
24
- pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
25
-
26
- # Generate Text from Model
27
  generated_ids = model.generate(pixel_values)
28
  extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
29
-
30
  return extracted_text
31
 
32
- # βœ… Load NLP Pipeline for Structuring
33
- nlp_pipeline = pipeline("ner", model="dslim/bert-base-NER")
34
-
35
- # βœ… Function to Structure Extracted Text
36
- def structure_text(text):
37
- ner_results = nlp_pipeline(text)
38
- structured_output = []
39
- for entity in ner_results:
40
- structured_output.append(f"{entity['word']} ({entity['entity']})")
41
- return " ".join(structured_output)
42
-
43
- # βœ… Function to process document (OCR + NLP)
44
- def process_document(image):
45
- extracted_text = extract_text(image)
46
- structured_text = structure_text(extracted_text)
47
- return extracted_text, structured_text
48
-
49
- # βœ… Launch Gradio App
50
  iface = gr.Interface(
51
- fn=process_document,
52
  inputs="image",
53
- outputs=["text", "text"],
54
- title="TransformoDocs - AI Document Processor",
55
- description="Upload a scanned document or handwritten note. The AI will extract and structure the text.",
56
  )
57
 
58
- iface.launch(share=True) # βœ… Use 'share=True' for public link
 
 
 
1
  import gradio as gr
2
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
 
3
  from PIL import Image
4
  import requests
5
 
6
+ # Load your model from Hugging Face
7
+ processor = TrOCRProcessor.from_pretrained("DeepDiveDev/transformodocs-ocr")
8
+ model = VisionEncoderDecoderModel.from_pretrained("DeepDiveDev/transformodocs-ocr")
 
 
9
 
10
+ # Function to extract text
 
 
 
 
11
  def extract_text(image):
12
  image = Image.open(image).convert("RGB")
13
+ pixel_values = processor(images=image, return_tensors="pt").pixel_values
 
 
 
 
14
  generated_ids = model.generate(pixel_values)
15
  extracted_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
 
16
  return extracted_text
17
 
18
+ # Gradio Interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  iface = gr.Interface(
20
+ fn=extract_text,
21
  inputs="image",
22
+ outputs="text",
23
+ title="TransformoDocs - AI OCR",
24
+ description="Upload a handwritten document and get the extracted text.",
25
  )
26
 
27
+ iface.launch()