barghavani commited on
Commit
2225d19
·
verified ·
1 Parent(s): 5086bbb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -17
app.py CHANGED
@@ -13,10 +13,33 @@ import whisper
13
 
14
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
 
16
- def transcribe_audio(audio_file):
17
- model = whisper.load_model("large")
18
- result = model.transcribe(audio_file, language="en", fp16=False)
19
- return result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  def get_pdf_text(pdf_docs):
21
  text=""
22
  for pdf in pdf_docs:
@@ -82,22 +105,20 @@ def main():
82
  st.set_page_config("Chat PDF")
83
  st.header("QnA with Multiple PDF files💁")
84
 
 
 
 
 
 
85
  with st.sidebar:
86
  st.title("Menu:")
87
- audio_query = st.file_uploader("Upload your Audio Query", type=['mp3', 'wav'])
88
- pdf_docs = st.file_uploader("Upload your PDF Files", accept_multiple_files=True)
89
  if st.button("Submit & Process"):
90
- with st.spinner("Processing Audio and PDFs..."):
91
- if audio_query is not None:
92
- user_question = transcribe_audio(audio_query)
93
- raw_text = get_pdf_text(pdf_docs)
94
- text_chunks = get_text_chunks(raw_text)
95
- get_vector_store(text_chunks)
96
- response = user_input(user_question)
97
- st.success("Done")
98
- st.write("Reply: ", response)
99
- else:
100
- st.error("Please upload an audio file for the query.")
101
 
102
 
103
 
 
13
 
14
  genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
15
 
16
+ model = whisper.load_model("small")
17
+ def transcribe(audio):
18
+ # Load audio and pad/trim it to fit 30 seconds
19
+ audio = whisper.load_audio(audio)
20
+ audio = whisper.pad_or_trim(audio)
21
+
22
+ # Make log-Mel spectrogram and move to the same device as the model
23
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
24
+
25
+ # Detect the spoken language
26
+ _, probs = model.detect_language(mel)
27
+ detected_language = max(probs, key=probs.get)
28
+ print(f"Detected language: {detected_language}")
29
+
30
+ # Decode the audio
31
+ options = whisper.DecodingOptions(fp16=False)
32
+ result = whisper.decode(model, mel, options)
33
+
34
+ # Check if the detected language is English; if not, translate the text
35
+ if detected_language != "en":
36
+ # Initialize the translation model; specify source and target languages as needed
37
+ translator = pipeline("translation_xx_to_yy", model="Helsinki-NLP/opus-mt-xx-en")
38
+ translated_text = translator(result.text, max_length=512)[0]['translation_text']
39
+ return translated_text
40
+
41
+ return result.text
42
+
43
  def get_pdf_text(pdf_docs):
44
  text=""
45
  for pdf in pdf_docs:
 
105
  st.set_page_config("Chat PDF")
106
  st.header("QnA with Multiple PDF files💁")
107
 
108
+ user_question = st.text_input(result.text)
109
+
110
+ if user_question:
111
+ user_input(user_question)
112
+
113
  with st.sidebar:
114
  st.title("Menu:")
115
+ pdf_docs = st.file_uploader("Upload your PDF Files and Click on the Submit & Process Button", accept_multiple_files=True)
 
116
  if st.button("Submit & Process"):
117
+ with st.spinner("Processing..."):
118
+ raw_text = get_pdf_text(pdf_docs)
119
+ text_chunks = get_text_chunks(raw_text)
120
+ get_vector_store(text_chunks)
121
+ st.success("Done")
 
 
 
 
 
 
122
 
123
 
124