File size: 1,390 Bytes
c29df11
3b67edb
c29df11
3b67edb
6bc3e62
43cd7fa
3b67edb
d189514
96ff633
 
 
 
 
 
 
 
 
 
 
 
 
0eeaa38
96ff633
 
0eeaa38
96ff633
 
 
 
 
 
6d1a446
96ff633
 
f0be5df
723e467
1245333
 
723e467
f0be5df
657d295
 
 
 
 
 
 
6d1a446
96ff633
657d295
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
import chromadb
import fitz  # PyMuPDF
import time

client = chromadb.PersistentClient(path="./")
collection = client.get_or_create_collection(name="code") 

# Funktion zum Extrahieren von Text aus einer PDF-Datei
def extract_text_from_pdf(file_path):
    try:
        doc = fitz.open(file_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text
    except Exception as e:
        return f"Fehler beim Lesen der PDF-Datei: {e}"

def process_pdf(uploaded_file, prompt):
    if uploaded_file is not None:
        
        # Text aus der hochgeladenen PDF-Datei extrahieren
        pdf_text = extract_text_from_pdf(uploaded_file.name)
        return pdf_text
        timestamp = time.time()
        if pdf_text:
            collection.add(
                documents=[pdf_text],
                ids=[timestamp]
            )
            #print(pdf_text)
            return pdf_text


def suchen(inputs):
    test = "hallo"
    return test
  

with gr.Blocks() as demo:
    gr.Markdown("Start typing below and then click **Run** to see the output.")
    with gr.Row():
        inp = gr.Textbox(placeholder="What is your name?")
        out = gr.Textbox()
        file_inp = gr.File(type="filepath", label="PDF-Datei hochladen"),
    btn = gr.Button("Run")
    btn.click(fn=process_pdf, inputs=inp, outputs=out)

demo.launch()