DrishtiSharma commited on
Commit
af77c7a
Β·
verified Β·
1 Parent(s): 130b915

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -42
app.py CHANGED
@@ -10,9 +10,7 @@ from langchain_groq import ChatGroq
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
  from PyPDF2 import PdfReader
13
- from gtts import gTTS
14
- from pydub import AudioSegment
15
- from pydub.playback import play
16
 
17
  # Clear ChromaDB cache to fix tenant issue
18
  chromadb.api.client.SharedSystemClient.clear_system_cache()
@@ -23,6 +21,10 @@ if not GROQ_API_KEY:
23
  st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
24
  st.stop()
25
 
 
 
 
 
26
  # Function to process PDFs and set up the vectorstore
27
  def process_and_store_pdfs(uploaded_files):
28
  texts = []
@@ -37,7 +39,6 @@ def process_and_store_pdfs(uploaded_files):
37
 
38
  # Function to set up the chat chain
39
  def chat_chain(vectorstore):
40
- llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
41
  retriever = vectorstore.as_retriever()
42
  memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
43
 
@@ -51,7 +52,7 @@ def chat_chain(vectorstore):
51
  )
52
  return chain
53
 
54
- # Function to record audio using JavaScript
55
  RECORD_JS = """
56
  const sleep = time => new Promise(resolve => setTimeout(resolve, time));
57
  const b2text = blob => new Promise(resolve => {
@@ -77,70 +78,60 @@ var record = time => new Promise(async resolve => {
77
 
78
  def record_audio(seconds=5):
79
  """Record audio via JavaScript and save it as a .wav file."""
80
- st.write("Recording...")
81
  from streamlit.components.v1 import html
82
- html(f'<script>{RECORD_JS}</script>', height=0)
83
- b64_audio = st.experimental_js("record", seconds * 1000)
84
- audio_bytes = b64decode(b64_audio.split(",")[1])
85
- with open("recorded_audio.wav", "wb") as f:
86
  f.write(audio_bytes)
87
- st.success("Audio recorded and saved!")
88
- return "recorded_audio.wav"
89
 
90
- # Transcribe audio using Groq Whisper
91
- from groq import Groq
92
- def transcribe_audio(filepath):
93
- client = Groq(api_key=GROQ_API_KEY)
94
- with open(filepath, "rb") as file:
95
- transcription = client.audio.transcriptions.create(
96
- file=(filepath, file.read()),
97
  model="distil-whisper-large-v3-en",
98
  response_format="json",
99
  language="en"
100
  )
101
- return transcription.text
102
-
103
- # Text-to-Speech Function
104
- def text_to_speech(response):
105
- tts = gTTS(text=response, lang='en')
106
- tts.save("response.mp3")
107
- sound = AudioSegment.from_file("response.mp3")
108
- play(sound)
109
 
110
  # Streamlit UI
111
  st.title("Chat with PDFs via Audio πŸŽ™οΈπŸ“š")
112
 
113
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
 
114
  if uploaded_files:
115
  vectorstore = process_and_store_pdfs(uploaded_files)
116
  chain = chat_chain(vectorstore)
117
  st.success("PDFs processed! Ready to chat.")
118
 
119
- # User options for input
120
- input_mode = st.radio("Choose input method:", ["Text", "Audio"])
121
 
122
- # Text input
123
- if input_mode == "Text":
124
- user_input = st.text_input("Ask your question:")
125
- if user_input:
126
  with st.spinner("Thinking..."):
127
- response = chain({"question": user_input})["answer"]
128
  st.write(f"**Response:** {response}")
129
- text_to_speech(response)
130
 
131
- # Audio input
132
- elif input_mode == "Audio":
133
  if st.button("Record Audio"):
134
  audio_file = record_audio(5)
135
  st.audio(audio_file)
136
 
 
137
  st.write("Transcribing audio...")
138
- question = transcribe_audio(audio_file)
139
- st.write(f"**You said:** {question}")
140
 
141
- with st.spinner("Thinking..."):
142
- response = chain({"question": question})["answer"]
 
143
  st.write(f"**Response:** {response}")
144
- text_to_speech(response)
145
  else:
146
  st.info("Please upload PDF files to start chatting.")
 
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
  from PyPDF2 import PdfReader
13
+ from groq import Groq
 
 
14
 
15
  # Clear ChromaDB cache to fix tenant issue
16
  chromadb.api.client.SharedSystemClient.clear_system_cache()
 
21
  st.error("GROQ_API_KEY is not set. Please configure it in Hugging Face Spaces secrets.")
22
  st.stop()
23
 
24
+ # Initialize Groq Client for transcription and LLM
25
+ groq_client = Groq(api_key=GROQ_API_KEY)
26
+ llm = ChatGroq(model="llama-3.1-70b-versatile", temperature=0, groq_api_key=GROQ_API_KEY)
27
+
28
  # Function to process PDFs and set up the vectorstore
29
  def process_and_store_pdfs(uploaded_files):
30
  texts = []
 
39
 
40
  # Function to set up the chat chain
41
  def chat_chain(vectorstore):
 
42
  retriever = vectorstore.as_retriever()
43
  memory = ConversationBufferMemory(output_key="answer", memory_key="chat_history", return_messages=True)
44
 
 
52
  )
53
  return chain
54
 
55
+ # JavaScript for recording audio
56
  RECORD_JS = """
57
  const sleep = time => new Promise(resolve => setTimeout(resolve, time));
58
  const b2text = blob => new Promise(resolve => {
 
78
 
79
  def record_audio(seconds=5):
80
  """Record audio via JavaScript and save it as a .wav file."""
81
+ st.write("Recording audio...")
82
  from streamlit.components.v1 import html
83
+ audio_b64 = st.experimental_js("record", seconds * 1000)
84
+ audio_bytes = b64decode(audio_b64.split(",")[1])
85
+ audio_file_path = "recorded_audio.wav"
86
+ with open(audio_file_path, "wb") as f:
87
  f.write(audio_bytes)
88
+ return audio_file_path
 
89
 
90
+ def transcribe_audio(file_path):
91
+ """Transcribe audio using Groq Whisper."""
92
+ with open(file_path, "rb") as file:
93
+ transcription = groq_client.audio.transcriptions.create(
94
+ file=(file_path, file.read()),
 
 
95
  model="distil-whisper-large-v3-en",
96
  response_format="json",
97
  language="en"
98
  )
99
+ return transcription['text']
 
 
 
 
 
 
 
100
 
101
  # Streamlit UI
102
  st.title("Chat with PDFs via Audio πŸŽ™οΈπŸ“š")
103
 
104
  uploaded_files = st.file_uploader("Upload PDF Files", accept_multiple_files=True, type=["pdf"])
105
+
106
  if uploaded_files:
107
  vectorstore = process_and_store_pdfs(uploaded_files)
108
  chain = chat_chain(vectorstore)
109
  st.success("PDFs processed! Ready to chat.")
110
 
111
+ input_method = st.radio("Choose Input Method", ["Text Input", "Audio Input"])
 
112
 
113
+ # Text Input Mode
114
+ if input_method == "Text Input":
115
+ query = st.text_input("Ask your question:")
116
+ if query:
117
  with st.spinner("Thinking..."):
118
+ response = chain({"question": query})["answer"]
119
  st.write(f"**Response:** {response}")
 
120
 
121
+ # Audio Input Mode
122
+ elif input_method == "Audio Input":
123
  if st.button("Record Audio"):
124
  audio_file = record_audio(5)
125
  st.audio(audio_file)
126
 
127
+ # Transcription
128
  st.write("Transcribing audio...")
129
+ transcription = transcribe_audio(audio_file)
130
+ st.write(f"**You said:** {transcription}")
131
 
132
+ # Generate Response
133
+ with st.spinner("Generating response..."):
134
+ response = chain({"question": transcription})["answer"]
135
  st.write(f"**Response:** {response}")
 
136
  else:
137
  st.info("Please upload PDF files to start chatting.")