Spaces:

Ankitajadhav
/

Moin_Von_Bremen

Sleeping

App Files Files Community

Ankitajadhav commited on Aug 12, 2024

Commit

d14c9fe

verified ·

1 Parent(s): ddc268d

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -0

app.py CHANGED Viewed

@@ -86,3 +86,112 @@ collection_text.add(
     ids=loaded_ids
 )

     ids=loaded_ids
 )
+# Initialize the transcriber
+transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-base.en",device ='cuda')
+# Preload TTS models
+preload_models()
+image_path = "dom_bremen.jpg"
+absolute_path = os.path.abspath(image_path)
+def transcribe(audio):
+    sr, y = audio
+    y = y.astype(np.float32)
+    y /= np.max(np.abs(y))
+    return transcriber({"sampling_rate": sr, "raw": y})["text"]
+fixed_prompt = "en_speaker_5"
+def generate_audio_output(text):
+    audio_arr = generate_audio(text, history_prompt=fixed_prompt)
+    audio_arr = (audio_arr * 32767).astype(np.int16)
+    return (SAMPLE_RATE, audio_arr)
+# Function to retrieve and generate text based on input query
+def generate_text(message, max_tokens=150, temperature=0.2, top_p=0.9):
+    try:
+        # Retrieve context and image from vector store
+        retrieved_image = collection_images.query(query_texts=message, include=['data'], n_results=1)
+        context_text = collection_text.query(query_texts=message, n_results=1)
+        context = context_text['documents'][0] if context_text else "No relevant context found."
+        image_data = retrieved_image['uris'][0] if retrieved_image else None
+        image_url = image_data if image_data else None
+        # Log the image URL for debugging
+        print(f"Retrieved image URL: {image_url}")
+        # Create prompt template for LLM
+        prompt_template = (
+            f"Context: {context}\n\n"
+            f"Question: {message}\n\n"
+            f"You are a guide to city of Bremen from Germany, generate response based on context."
+        )
+        # Generate text using the language model
+        output = llm(
+                prompt_template,
+                temperature=temperature,
+                top_p=top_p,
+                top_k=50,
+                repeat_penalty=1.1,
+                max_tokens=max_tokens,
+            )
+        # Process the output
+        input_string = output['choices'][0]['text'].strip()
+        cleaned_text = input_string.strip("[]'").replace('\\n', '\n')
+        continuous_text = '\n'.join(cleaned_text.split('\n'))
+        return continuous_text, image_url[0]
+    except Exception as e:
+        return f"Error: {str(e)}", None
+# Function to load and display an image from a file path
+def load_image_from_path(file_path):
+    try:
+        img = Image.open(file_path)
+        return img
+    except Exception as e:
+        print(f"Error loading image: {str(e)}")
+        return None
+def process_audio(audio):
+    # Transcribe the audio
+    transcribed_text = transcribe(audio)
+    text_output, image_path = generate_text(transcribed_text)
+    if image_path:
+        image_output = load_image_from_path(image_path)
+    else:
+        image_output = None  # Handle cases where no image is retrieved
+    # return text_output, image_output
+    # Generate audio output
+    audio_output = generate_audio_output(text_output)
+    return text_output,audio_output,image_output
+def gen_tts(text):
+    audio_arr = generate_audio(text, history_prompt=fixed_prompt)
+    audio_arr = (audio_arr * 32767).astype(np.int16)
+    return (SAMPLE_RATE, audio_arr)
+# Define the Gradio interface
+# with gr.Blocks() as app:
+demo = gr.Interface(
+        fn=process_audio,
+        inputs=gr.Audio(sources=["microphone"], label="Input Audio"),
+        outputs=[
+            gr.Textbox(label="Generated Text"),
+            gr.Audio(label="Generated Audio"),
+            gr.Image(label="Retrieved Image")  # New output component for the image
+        ],
+        title="moinBremen - Your Personal Tour Guide for our City of Bremen",
+        description="Ask your question about Bremen by speaking into the microphone. The system will transcribe your question, generate a response, and read it out loud.",
+        css=""".gradio-container {
+        background: url('file=/content/dom_bremen.jpg') no-repeat center center fixed;
+        background-size: cover;
+        }""",
+        cache_examples=False,
+    )
+demo.launch(allowed_paths=[absolute_path])