Spaces:

saillab
/

TeacherAssistant

Sleeping

App Files Files Community

barghavani commited on Apr 11, 2024

Commit

2225d19

verified ·

1 Parent(s): 5086bbb

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -17

app.py CHANGED Viewed

@@ -13,10 +13,33 @@ import whisper
 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
-def transcribe_audio(audio_file):
-    model = whisper.load_model("large")
-    result = model.transcribe(audio_file, language="en", fp16=False)
-    return result["text"]
 def get_pdf_text(pdf_docs):
     text=""
     for pdf in pdf_docs:
@@ -82,22 +105,20 @@ def main():
     st.set_page_config("Chat PDF")
     st.header("QnA with Multiple PDF files💁")
     with st.sidebar:
         st.title("Menu:")
-        audio_query = st.file_uploader("Upload your Audio Query", type=['mp3', 'wav'])
-        pdf_docs = st.file_uploader("Upload your PDF Files", accept_multiple_files=True)
         if st.button("Submit & Process"):
-            with st.spinner("Processing Audio and PDFs..."):
-                if audio_query is not None:
-                    user_question = transcribe_audio(audio_query)
-                    raw_text = get_pdf_text(pdf_docs)
-                    text_chunks = get_text_chunks(raw_text)
-                    get_vector_store(text_chunks)
-                    response = user_input(user_question)
-                    st.success("Done")
-                    st.write("Reply: ", response)
-                else:
-                    st.error("Please upload an audio file for the query.")

 genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
+model = whisper.load_model("small")
+def transcribe(audio):
+    # Load audio and pad/trim it to fit 30 seconds
+    audio = whisper.load_audio(audio)
+    audio = whisper.pad_or_trim(audio)
+    # Make log-Mel spectrogram and move to the same device as the model
+    mel = whisper.log_mel_spectrogram(audio).to(model.device)
+    # Detect the spoken language
+    _, probs = model.detect_language(mel)
+    detected_language = max(probs, key=probs.get)
+    print(f"Detected language: {detected_language}")
+    # Decode the audio
+    options = whisper.DecodingOptions(fp16=False)
+    result = whisper.decode(model, mel, options)
+    # Check if the detected language is English; if not, translate the text
+    if detected_language != "en":
+        # Initialize the translation model; specify source and target languages as needed
+        translator = pipeline("translation_xx_to_yy", model="Helsinki-NLP/opus-mt-xx-en")
+        translated_text = translator(result.text, max_length=512)[0]['translation_text']
+        return translated_text
+    return result.text
 def get_pdf_text(pdf_docs):
     text=""
     for pdf in pdf_docs:
     st.set_page_config("Chat PDF")
     st.header("QnA with Multiple PDF files💁")
+    user_question = st.text_input(result.text)
+    if user_question:
+        user_input(user_question)
     with st.sidebar:
         st.title("Menu:")
+        pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
         if st.button("Submit & Process"):
+            with st.spinner("Processing..."):
+                raw_text = get_pdf_text(pdf_docs)
+                text_chunks = get_text_chunks(raw_text)
+                get_vector_store(text_chunks)
+                st.success("Done")