File size: 5,556 Bytes
c29df11
3b67edb
dbd949c
 
e9a8280
12181b6
54c3e95
b0c20dc
 
 
54c3e95
b0c20dc
dbd949c
de4f158
dbd949c
e9a8280
dbd949c
 
b0c20dc
fa0bed9
8c6505a
b0c20dc
 
 
9d4d8fc
546588e
9d4d8fc
 
b0c20dc
9d4d8fc
3b96a88
12e33ea
9d4d8fc
 
 
12181b6
 
 
 
 
 
cacfc38
9d4d8fc
b0c20dc
69e614b
18302c2
12181b6
 
 
 
 
3bff9be
 
 
 
 
 
1895b54
3bff9be
 
 
 
 
 
 
 
 
 
 
fa0bed9
 
 
3bff9be
 
 
 
 
 
 
 
33863b9
3bff9be
fdeebc3
 
 
 
33863b9
fdeebc3
33863b9
3bff9be
 
 
 
 
 
 
 
 
1895b54
3bff9be
 
 
 
 
 
 
2ec6e2c
 
 
 
 
 
 
d3c9f68
2ec6e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
import gradio as gr
import chromadb
from chromadb.utils import embedding_functions
from PyPDF2 import PdfReader
from gradio_client import Client
import speech_recognition as sr
import groq
import os

#get your api-key @groq.com. its free!
api_key = os.getenv('groq')

# Initialisiere ChromaDB
client_chroma = chromadb.Client()
collection_name = "pdf_collection"
collection = client_chroma.get_or_create_collection(name=collection_name)
# Verwende die integrierten Embeddings von ChromaDB
embedding_function = embedding_functions.DefaultEmbeddingFunction()

client = groq.Client(api_key=api_key)

# Use Llama 3 70B powered by Groq for answering
def update(message):   
    try:        
        completion = client.chat.completions.create(
            model="Mixtral-8x7b-32768",
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": f"{message}. antworte immer auf deutsch"}
            ],
        )       
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error in response generation: {str(e)}"

# Function to transcribe audio data to text
def transcribe_audio(audio):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio) as source:
        audio_data = recognizer.record(source)
        try:
            text = recognizer.recognize_google(audio_data, language="de-DE")
            result = update(text)
            result=gr.Markdown(result)
            return result

        except sr.UnknownValueError:
            return "Speech recognition could not understand the audio."
        except sr.RequestError as e:
            return f"Could not request results from Google Speech Recognition service; {e}"

def ask_llm(llm_prompt_input):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([llm_prompt_input])[0]
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=3
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n")
    
    # Füge die formatierten Ergebnisse zum Prompt hinzu
    enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}"
    result = update(enriched_prompt)
    result=gr.Markdown(result)
    return result
 
def process_pdf(file):
    # Read the PDF content
    pdf_reader = PdfReader(file.name)
    text = ""
    for page in pdf_reader.pages:
        text += page.extract_text()
    
    embeddings = embedding_function([text])
    
    # Store the entire text in ChromaDB
    collection.add(
        documents=[text],
        metadatas=[{"filename": file.name}],
        ids=[file.name]  # Use the filename as the unique ID
    )
    
    return f"PDF wurde erfolgreich in ChromaDB gespeichert."

def search_similar_documents(prompt):
    # Erstelle Embedding für den Prompt
    query_embedding = embedding_function([prompt])[0]
    
    # Führe die Ähnlichkeitssuche durch
    results = collection.query(
        query_embeddings=[query_embedding],
        n_results=1
    )
    
    # Formatiere die Ergebnisse
    formatted_results = []
    for i, doc in enumerate(results["documents"][0]):
        metadata = results["metadatas"][0][i]
        filename = metadata["filename"]
        formatted_results.append(f"{doc}\n")

    ergebnis = f"{''.join(formatted_results)}"
    ergebnis = gr.Markdown(ergebnis)
    return ergebnis


with gr.Blocks() as chat:
    gr.Markdown("### Ask the RKI Files", elem_classes="tab-header")
    with gr.Row():       
        llm_output = gr.Textbox(label="LLM Answer")
    with gr.Row():
        llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein")
        llm_submit_button = gr.Button("send")  
    llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output)

with gr.Blocks() as upload:
    gr.Markdown("### File upload", elem_classes="tab-header")
    with gr.Row():
        file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath")
        upload_output = gr.Textbox(label="Upload Status")
    with gr.Row():
        submit_button = gr.Button("upload")
    submit_button.click(process_pdf, inputs=file_input, outputs=upload_output)

with gr.Blocks() as suche:
    gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header")
    with gr.Row():
        prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein")        
    with gr.Row():
        search_output = gr.Textbox(label="Ähnliche Dokumente")
    with gr.Row():
        search_button = gr.Button("Suchen")
    search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output)

#optional, Spracheingabe
with gr.Blocks() as speech:
    gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header")
    with gr.Row():
        sr_outputs = gr.Textbox(label="Antwort")
    with gr.Row():
        sr_inputs = gr.Microphone(type="filepath") 
    sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs)

# Erstelle die Gradio-Schnittstelle
with gr.Blocks() as demo:
    gr.TabbedInterface(
        [chat, upload, suche],
        ["Chat", "Upload", "Suche"]
    )

# Starte die Gradio-Anwendung
demo.launch()