la04 commited on
Commit
026a204
·
verified ·
1 Parent(s): b6d30d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -27
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import gradio as gr
2
- from langchain.vectorstores import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
- from transformers import LayoutLMv3Processor, AutoModelForSeq2SeqLM
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
@@ -10,16 +10,20 @@ import os
10
 
11
  class LayoutLMv3OCR:
12
  def __init__(self):
 
13
  self.processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
14
- self.model = AutoModelForSeq2SeqLM.from_pretrained("microsoft/layoutlmv3-base")
15
 
16
  def extract_text(self, pdf_path):
17
  images = convert_from_path(pdf_path)
18
  text_pages = []
19
  for image in images:
 
20
  inputs = self.processor(images=image, return_tensors="pt")
21
- outputs = self.model.generate(**inputs)
22
- text = self.processor.batch_decode(outputs, skip_special_tokens=True)[0]
 
 
23
  text_pages.append(text)
24
  return text_pages
25
 
@@ -48,25 +52,4 @@ def chatbot_response(pdf, question):
48
  with open(pdf_path, "wb") as f:
49
  f.write(pdf.read())
50
 
51
- extracted_text = ocr_tool.extract_text(pdf_path)
52
- answer = process_pdf_and_query(pdf_path, question)
53
-
54
- # Lösche die gespeicherte PDF-Datei nach der Verarbeitung
55
- os.remove(pdf_path)
56
-
57
- return answer
58
-
59
- pdf_input = gr.File(label="PDF-Datei hochladen")
60
- question_input = gr.Textbox(label="Frage eingeben")
61
- response_output = gr.Textbox(label="Antwort")
62
-
63
- interface = gr.Interface(
64
- fn=chatbot_response,
65
- inputs=[pdf_input, question_input],
66
- outputs=response_output,
67
- title="RAG Chatbot mit PDF-Unterstützung",
68
- description="Lade eine PDF-Datei hoch und stelle Fragen zu ihrem Inhalt."
69
- )
70
-
71
- if __name__ == "__main__":
72
- interface.launch(share=True)
 
1
  import gradio as gr
2
+ from langchain_community.vectorstores import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
+ from transformers import LayoutLMv3Processor, AutoModelForTokenClassification
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
 
10
 
11
  class LayoutLMv3OCR:
12
  def __init__(self):
13
+ # Lade den LayoutLMv3-Prozessor und das Modell für Token-Klassifikation
14
  self.processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
15
+ self.model = AutoModelForTokenClassification.from_pretrained("microsoft/layoutlmv3-base")
16
 
17
  def extract_text(self, pdf_path):
18
  images = convert_from_path(pdf_path)
19
  text_pages = []
20
  for image in images:
21
+ # Verarbeite die Bilddaten mit LayoutLMv3
22
  inputs = self.processor(images=image, return_tensors="pt")
23
+ # Führe Vorhersagen durch
24
+ outputs = self.model(**inputs)
25
+ # Extrahiere den Text aus den Vorhersagen (falls dies vorgesehen ist)
26
+ text = self.processor.batch_decode(outputs.logits, skip_special_tokens=True)[0]
27
  text_pages.append(text)
28
  return text_pages
29
 
 
52
  with open(pdf_path, "wb") as f:
53
  f.write(pdf.read())
54
 
55
+ extracted_text = ocr_tool.extr