la04 commited on
Commit
c106446
·
verified ·
1 Parent(s): 692eafb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -5
app.py CHANGED
@@ -1,12 +1,12 @@
1
  import gradio as gr
 
2
  from langchain.vectorstores import Chroma
3
  from langchain_community.document_loaders import PyPDFLoader
4
  from langchain_community.embeddings import HuggingFaceEmbeddings
5
- from transformers import LayoutLMv3Processor, AutoModelForTokenClassification
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
9
- import os
10
 
11
  class LayoutLMv3OCR:
12
  def __init__(self):
@@ -41,10 +41,12 @@ def process_pdf_and_query(pdf_path, question):
41
  return response
42
 
43
  def chatbot_response(pdf, question):
44
- # Wir speichern die hochgeladene PDF-Datei als temporäre Datei
45
  pdf_path = "/tmp/uploaded_pdf.pdf"
 
 
46
  with open(pdf_path, "wb") as f:
47
- f.write(pdf.read()) # PDF-Inhalt als Byte-Stream speichern
48
 
49
  # OCR-Text extrahieren
50
  extracted_text = ocr_tool.extract_text(pdf_path)
@@ -57,7 +59,7 @@ def chatbot_response(pdf, question):
57
 
58
  return answer
59
 
60
- # Ändere 'inputs' und 'outputs' zur neuen Gradio API
61
  pdf_input = gr.File(label="PDF-Datei hochladen")
62
  question_input = gr.Textbox(label="Frage eingeben")
63
  response_output = gr.Textbox(label="Antwort")
 
1
  import gradio as gr
2
+ import os
3
  from langchain.vectorstores import Chroma
4
  from langchain_community.document_loaders import PyPDFLoader
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
 
6
  from langchain.chains import RetrievalQA
7
  from langchain.prompts import PromptTemplate
8
  from pdf2image import convert_from_path
9
+ from transformers import LayoutLMv3Processor, AutoModelForTokenClassification
10
 
11
  class LayoutLMv3OCR:
12
  def __init__(self):
 
41
  return response
42
 
43
  def chatbot_response(pdf, question):
44
+ # Gradio gibt uns das PDF als NamedString, wir speichern es als temporäre Datei
45
  pdf_path = "/tmp/uploaded_pdf.pdf"
46
+
47
+ # Speichern des Byte-Streams von der Datei
48
  with open(pdf_path, "wb") as f:
49
+ f.write(pdf) # pdf ist bereits als Byte-Stream verfügbar
50
 
51
  # OCR-Text extrahieren
52
  extracted_text = ocr_tool.extract_text(pdf_path)
 
59
 
60
  return answer
61
 
62
+ # Gradio Interface
63
  pdf_input = gr.File(label="PDF-Datei hochladen")
64
  question_input = gr.Textbox(label="Frage eingeben")
65
  response_output = gr.Textbox(label="Antwort")