Spaces:
Sleeping
Sleeping
import gradio as gr | |
import chromadb | |
from chromadb.utils import embedding_functions | |
from PyPDF2 import PdfReader | |
from gradio_client import Client | |
import speech_recognition as sr | |
import groq | |
import os | |
#get your api-key @groq.com. its free! | |
api_key = os.getenv('groq') | |
# Initialisiere ChromaDB | |
client_chroma = chromadb.Client() | |
collection_name = "pdf_collection" | |
collection = client_chroma.get_or_create_collection(name=collection_name) | |
# Verwende die integrierten Embeddings von ChromaDB | |
embedding_function = embedding_functions.DefaultEmbeddingFunction() | |
client = groq.Client(api_key=api_key) | |
# Use Llama 3 70B powered by Groq for answering | |
def update(message): | |
try: | |
completion = client.chat.completions.create( | |
model="Mixtral-8x7b-32768", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"{message}. antworte immer auf deutsch"} | |
], | |
) | |
return completion.choices[0].message.content | |
except Exception as e: | |
return f"Error in response generation: {str(e)}" | |
# Function to transcribe audio data to text | |
def transcribe_audio(audio): | |
recognizer = sr.Recognizer() | |
with sr.AudioFile(audio) as source: | |
audio_data = recognizer.record(source) | |
try: | |
text = recognizer.recognize_google(audio_data, language="de-DE") | |
result = update(text) | |
result=gr.Markdown(result) | |
return result | |
except sr.UnknownValueError: | |
return "Speech recognition could not understand the audio." | |
except sr.RequestError as e: | |
return f"Could not request results from Google Speech Recognition service; {e}" | |
def ask_llm(llm_prompt_input): | |
# Erstelle Embedding für den Prompt | |
query_embedding = embedding_function([llm_prompt_input])[0] | |
# Führe die Ähnlichkeitssuche durch | |
results = collection.query( | |
query_embeddings=[query_embedding], | |
n_results=3 | |
) | |
# Formatiere die Ergebnisse | |
formatted_results = [] | |
for i, doc in enumerate(results["documents"][0]): | |
metadata = results["metadatas"][0][i] | |
filename = metadata["filename"] | |
formatted_results.append(f"### Dokument {i+1} (Dateiname: {filename})\n{doc}\n") | |
# Füge die formatierten Ergebnisse zum Prompt hinzu | |
enriched_prompt = f"{llm_prompt_input}\n\n### Verwandte Informationen:\n{''.join(formatted_results)}" | |
result = update(enriched_prompt) | |
result=gr.Markdown(result) | |
return result | |
def process_pdf(file): | |
# Read the PDF content | |
pdf_reader = PdfReader(file.name) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() | |
embeddings = embedding_function([text]) | |
# Store the entire text in ChromaDB | |
collection.add( | |
documents=[text], | |
metadatas=[{"filename": file.name}], | |
ids=[file.name] # Use the filename as the unique ID | |
) | |
return f"PDF wurde erfolgreich in ChromaDB gespeichert." | |
def search_similar_documents(prompt): | |
# Erstelle Embedding für den Prompt | |
query_embedding = embedding_function([prompt])[0] | |
# Führe die Ähnlichkeitssuche durch | |
results = collection.query( | |
query_embeddings=[query_embedding], | |
n_results=1 | |
) | |
# Formatiere die Ergebnisse | |
formatted_results = [] | |
for i, doc in enumerate(results["documents"][0]): | |
metadata = results["metadatas"][0][i] | |
filename = metadata["filename"] | |
formatted_results.append(f"{doc}\n") | |
ergebnis = f"{''.join(formatted_results)}" | |
ergebnis = gr.Markdown(ergebnis) | |
return ergebnis | |
with gr.Blocks() as chat: | |
gr.Markdown("### Ask the RKI Files", elem_classes="tab-header") | |
with gr.Row(): | |
llm_output = gr.Textbox(label="LLM Answer") | |
with gr.Row(): | |
llm_prompt_input = gr.Textbox(label="Frage an das LLM", placeholder="Gib eine Frage ein") | |
llm_submit_button = gr.Button("send") | |
llm_submit_button.click(ask_llm, inputs=llm_prompt_input, outputs=llm_output) | |
with gr.Blocks() as upload: | |
gr.Markdown("### File upload", elem_classes="tab-header") | |
with gr.Row(): | |
file_input = gr.File(label="Wähle eine PDF-Datei aus", type="filepath") | |
upload_output = gr.Textbox(label="Upload Status") | |
with gr.Row(): | |
submit_button = gr.Button("upload") | |
submit_button.click(process_pdf, inputs=file_input, outputs=upload_output) | |
with gr.Blocks() as suche: | |
gr.Markdown("### Datenbank durchsuchen", elem_classes="tab-header") | |
with gr.Row(): | |
prompt_input = gr.Textbox(label="Suche nach ähnlichen Dokumenten", placeholder="Gib einen Suchbegriff ein") | |
with gr.Row(): | |
search_output = gr.Textbox(label="Ähnliche Dokumente") | |
with gr.Row(): | |
search_button = gr.Button("Suchen") | |
search_button.click(search_similar_documents, inputs=prompt_input, outputs=search_output) | |
#optional, Spracheingabe | |
with gr.Blocks() as speech: | |
gr.Markdown("### Highspeed Voicebot", elem_classes="tab-header") | |
with gr.Row(): | |
sr_outputs = gr.Textbox(label="Antwort") | |
with gr.Row(): | |
sr_inputs = gr.Microphone(type="filepath") | |
sr_inputs.change(transcribe_audio, inputs=sr_inputs, outputs=sr_outputs) | |
# Erstelle die Gradio-Schnittstelle | |
with gr.Blocks() as demo: | |
gr.TabbedInterface( | |
[chat, upload, suche], | |
["Chat", "Upload", "Suche"] | |
) | |
# Starte die Gradio-Anwendung | |
demo.launch() | |