la04 commited on
Commit
c545813
·
verified ·
1 Parent(s): bd2041d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -4
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  from langchain.vectorstores import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
- from transformers import LayoutLMv3Processor, AutoModelForSeq2SeqLM
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
@@ -11,15 +11,19 @@ import os
11
  class LayoutLMv3OCR:
12
  def __init__(self):
13
  self.processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
14
- self.model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/layoutlmv3-base")
 
15
 
16
  def extract_text(self, pdf_path):
17
  images = convert_from_path(pdf_path)
18
  text_pages = []
19
  for image in images:
 
20
  inputs = self.processor(images=image, return_tensors="pt")
21
- outputs = self.model.generate(**inputs)
22
- text = self.processor.batch_decode(outputs, skip_special_tokens=True)[0]
 
 
23
  text_pages.append(text)
24
  return text_pages
25
 
 
2
  from langchain.vectorstores import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from transformers import LayoutLMv3Processor, AutoModelForTokenClassification
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
 
11
  class LayoutLMv3OCR:
12
  def __init__(self):
13
  self.processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
14
+ # Ändere AutoModelForSeq2SeqLM zu AutoModelForTokenClassification
15
+ self.model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
16
 
17
  def extract_text(self, pdf_path):
18
  images = convert_from_path(pdf_path)
19
  text_pages = []
20
  for image in images:
21
+ # Bilder werden für die OCR-Prozesse vorbereitet
22
  inputs = self.processor(images=image, return_tensors="pt")
23
+ # Modell wird zur Textextraktion genutzt
24
+ outputs = self.model(**inputs)
25
+ # Hier wird der dekodierte Text extrahiert
26
+ text = self.processor.batch_decode(outputs.logits, skip_special_tokens=True)[0]
27
  text_pages.append(text)
28
  return text_pages
29