mgokg commited on
Commit
96ff633
·
verified ·
1 Parent(s): 65ab811

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -0
app.py CHANGED
@@ -6,6 +6,50 @@ import time
6
  client = chromadb.PersistentClient(path="./")
7
  collection = client.get_or_create_collection(name="code")
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # Function to extract text from PDF file
10
  def extract_text_from_pdf(file_path):
11
  try:
 
6
  client = chromadb.PersistentClient(path="./")
7
  collection = client.get_or_create_collection(name="code")
8
 
9
+ # Funktion zum Extrahieren von Text aus einer PDF-Datei
10
+ def extract_text_from_pdf(file_path):
11
+ try:
12
+ doc = fitz.open(file_path)
13
+ text = ""
14
+ for page in doc:
15
+ text += page.get_text()
16
+ return text
17
+ except Exception as e:
18
+ return f"Fehler beim Lesen der PDF-Datei: {e}"
19
+
20
+ def process_pdf(uploaded_file, prompt):
21
+ if uploaded_file is not None:
22
+ # Text aus der hochgeladenen PDF-Datei extrahieren
23
+ pdf_text = extract_text_from_pdf(uploaded_file.name)
24
+ timestamp = time.time()
25
+ if pdf_text:
26
+ collection.add(
27
+ documents=[pdf_text],
28
+ ids=[timestamp]
29
+ )
30
+ print(pdf_text)
31
+ return pdf_text
32
+
33
+ def main():
34
+ gr.Interface(
35
+ fn=process_pdf,
36
+ inputs=[gr.File(type="filepath", label="PDF-Datei hochladen"),
37
+ gr.Textbox(lines=2, placeholder="Stellen Sie eine Frage")],
38
+ outputs="text",
39
+ title="PDF-Chatbot",
40
+ description="Laden Sie eine PDF-Datei hoch und stellen Sie Fragen zu ihrem Inhalt."
41
+ ).launch()
42
+
43
+ if __name__ == "__main__":
44
+ main()
45
+ import gradio as gr
46
+ import chromadb
47
+ import fitz # PyMuPDF
48
+ import time
49
+
50
+ client = chromadb.PersistentClient(path="./")
51
+ collection = client.get_or_create_collection(name="code")
52
+
53
  # Function to extract text from PDF file
54
  def extract_text_from_pdf(file_path):
55
  try: