Shahadbal commited on
Commit
a9c0113
·
verified ·
1 Parent(s): 312b3d1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -66
app.py CHANGED
@@ -12,17 +12,6 @@ from gtts import gTTS
12
  from moviepy.editor import VideoFileClip
13
  import yt_dlp
14
 
15
- # Check if the question_generation directory exists; if not, clone the repository
16
- if not os.path.exists("question_generation"):
17
- subprocess.call(["git", "clone", "https://github.com/patil-suraj/question_generation.git"])
18
-
19
-
20
- # Assuming 'question_generation' is the folder where pipelines.py is located
21
- sys.path.append(os.path.join(os.path.dirname(__file__), 'question_generation'))
22
-
23
- # Download the punkt tokenizer
24
- nltk.download('punkt')
25
-
26
  # Set logging verbosity
27
  logging.set_verbosity_error()
28
 
@@ -44,16 +33,6 @@ languages = {
44
  # Load QA pipeline
45
  qa_pipeline = pipeline(task="question-answering", model="deepset/roberta-base-squad2")
46
 
47
- # Load question generator
48
- from pipelines import pipeline
49
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
50
-
51
- # Load the tokenizer and model
52
- # tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-prepend", use_fast=False) # Disable the fast tokenizer
53
- # model = AutoModelForSeq2SeqLM.from_pretrained("valhalla/t5-small-qg-prepend")
54
- tokenizer = AutoTokenizer.from_pretrained("valhalla/t5-small-qg-prepend", use_fast=False)
55
-
56
- question_generator = pipeline("question-generation", model="valhalla/t5-small-qg-prepend", qg_format="prepend")
57
 
58
  # Function to download audio from YouTube
59
  def download_audio_from_youtube(youtube_url, output_path="downloaded_audio.mp3"):
@@ -119,22 +98,16 @@ def transcribe_content(content_type, audio_path, youtube_link, video):
119
  return whispermodel.transcribe(audio_file)["text"]
120
  return None
121
 
122
- def generate_summary_and_qna(summarize, qna, number):
123
  summary_text = None
124
- extracted_data = None
125
 
126
  if summarize:
127
  summary = summarizer(transcription, min_length=10, max_length=150)
128
  summary_text = summary[0]['summary_text']
129
 
130
- if qna:
131
- questions = question_generator(transcription)
132
- extracted_data = [{'question': item['question'], 'answer': item['answer'].replace('<pad> ', '')} for item in questions]
133
- extracted_data = extracted_data[:number] if len(extracted_data) > number else extracted_data
134
-
135
- return summary_text, extracted_data
136
 
137
- def translator_text(summary, data, language):
138
  if language == 'English':
139
  return summary, data
140
 
@@ -146,22 +119,7 @@ def translator_text(summary, data, language):
146
  else:
147
  translated_summary = "No summary requested."
148
 
149
- if data is not None:
150
- for item in data:
151
- question = item.get('question', '')
152
- answer = item.get('answer', '')
153
-
154
- translated_question = translator(question, src_lang=languages["English"], tgt_lang=languages[language])[0]['translation_text'] if question else ''
155
- translated_answer = translator(answer, src_lang=languages["English"], tgt_lang=languages[language])[0]['translation_text'] if answer else ''
156
-
157
- translated_data.append({
158
- 'question': translated_question,
159
- 'answer': translated_answer
160
- })
161
- else:
162
- translated_data = "No Q&A requested."
163
-
164
- return translated_summary, translated_data
165
 
166
  def create_audio_summary(summary, language):
167
  if summary and summary != 'No summary requested.':
@@ -171,7 +129,7 @@ def create_audio_summary(summary, language):
171
  return audio_path
172
  return None
173
 
174
- def main(content_type, audio_path, youtube_link, video, language, summarize, qna, number):
175
  global transcription, languageG
176
  languageG = language
177
 
@@ -184,25 +142,18 @@ def main(content_type, audio_path, youtube_link, video, language, summarize, qna
184
  if input_language != 'English':
185
  transcription = translator(transcription, src_lang=languages[input_language], tgt_lang=languages['English'])[0]['translation_text']
186
 
187
- summary_text, generated_qna = generate_summary_and_qna(summarize, qna, number)
188
- summary, qna = translator_text(summary_text, generated_qna, language)
189
  audio_path = create_audio_summary(summary, language)
190
-
191
- qna_output = (
192
- "\n\n".join(
193
- f"**Question:** {item['question']}\n**Answer:** {item['answer']}"
194
- for item in qna
195
- ) if qna else "No Q&A requested."
196
- )
197
 
198
- return summary, qna_output, audio_path
199
 
200
  # Gradio interface
201
  with gr.Blocks() as demo:
202
  gr.Markdown(
203
  """
204
  # Student Helper App
205
- This app assists students by allowing them to upload audio, video, or YouTube links for automatic transcription.
206
  It can translate content, summarize it, and generate Q&A questions to help with studying.
207
  The app is ideal for students who want to review lectures, study materials, or any educational content more efficiently.
208
  """
@@ -220,8 +171,6 @@ with gr.Blocks() as demo:
220
 
221
  language = gr.Radio(choices=["Arabic", "English"], label="Preferred Language", value="English")
222
  summarize = gr.Checkbox(label="Summarize the content?")
223
- qna = gr.Checkbox(label="Generate Q&A about the content?")
224
- number = gr.Number(label="How many questions do you want at maximum?", value=5)
225
 
226
  examples = [
227
  ["Audio Upload", "audio-example.mp3", None, None, "English", True, True, 5],
@@ -230,7 +179,7 @@ with gr.Blocks() as demo:
230
  ]
231
  gr.Examples(
232
  examples=examples,
233
- inputs=[content_type, file_input, youtube_input, video_input, language, summarize, qna, number],
234
  label="Try These Examples"
235
  )
236
 
@@ -238,9 +187,6 @@ with gr.Blocks() as demo:
238
  summary_output = gr.Textbox(label="Summary", interactive=False)
239
  audio_output = gr.Audio(label="Audio Summary")
240
 
241
- with gr.Tab("Q&A"):
242
- qna_output = gr.Markdown(label="Q&A Request")
243
-
244
  with gr.Tab("Interactive Q&A"):
245
  user_question = gr.Textbox(label="Ask a Question", placeholder="Enter your question here...")
246
  qa_button = gr.Button("Get Answer")
@@ -250,7 +196,7 @@ with gr.Blocks() as demo:
250
 
251
  content_type.change(content_input_update, inputs=[content_type], outputs=[file_input, youtube_input, video_input])
252
  submit_btn = gr.Button("Submit")
253
- submit_btn.click(main, inputs=[content_type, file_input, youtube_input, video_input, language, summarize, qna, number],
254
- outputs=[summary_output, qna_output, audio_output])
255
 
256
  demo.launch(share=True)
 
12
  from moviepy.editor import VideoFileClip
13
  import yt_dlp
14
 
 
 
 
 
 
 
 
 
 
 
 
15
  # Set logging verbosity
16
  logging.set_verbosity_error()
17
 
 
33
  # Load QA pipeline
34
  qa_pipeline = pipeline(task="question-answering", model="deepset/roberta-base-squad2")
35
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  # Function to download audio from YouTube
38
  def download_audio_from_youtube(youtube_url, output_path="downloaded_audio.mp3"):
 
98
  return whispermodel.transcribe(audio_file)["text"]
99
  return None
100
 
101
+ def generate_summary(summarize):
102
  summary_text = None
 
103
 
104
  if summarize:
105
  summary = summarizer(transcription, min_length=10, max_length=150)
106
  summary_text = summary[0]['summary_text']
107
 
108
+ return summary_text
 
 
 
 
 
109
 
110
+ def translator_text(summary, language):
111
  if language == 'English':
112
  return summary, data
113
 
 
119
  else:
120
  translated_summary = "No summary requested."
121
 
122
+ return translated_summary
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def create_audio_summary(summary, language):
125
  if summary and summary != 'No summary requested.':
 
129
  return audio_path
130
  return None
131
 
132
+ def main(content_type, audio_path, youtube_link, video, language, summarize):
133
  global transcription, languageG
134
  languageG = language
135
 
 
142
  if input_language != 'English':
143
  transcription = translator(transcription, src_lang=languages[input_language], tgt_lang=languages['English'])[0]['translation_text']
144
 
145
+ summary_text = generate_summary(summarize)
146
+ summary = translator_text(summary_text, language)
147
  audio_path = create_audio_summary(summary, language)
 
 
 
 
 
 
 
148
 
149
+ return summary, audio_path
150
 
151
  # Gradio interface
152
  with gr.Blocks() as demo:
153
  gr.Markdown(
154
  """
155
  # Student Helper App
156
+ This app allows students to upload audio, video, or YouTube links for automatic transcription.
157
  It can translate content, summarize it, and generate Q&A questions to help with studying.
158
  The app is ideal for students who want to review lectures, study materials, or any educational content more efficiently.
159
  """
 
171
 
172
  language = gr.Radio(choices=["Arabic", "English"], label="Preferred Language", value="English")
173
  summarize = gr.Checkbox(label="Summarize the content?")
 
 
174
 
175
  examples = [
176
  ["Audio Upload", "audio-example.mp3", None, None, "English", True, True, 5],
 
179
  ]
180
  gr.Examples(
181
  examples=examples,
182
+ inputs=[content_type, file_input, youtube_input, video_input, language, summarize],
183
  label="Try These Examples"
184
  )
185
 
 
187
  summary_output = gr.Textbox(label="Summary", interactive=False)
188
  audio_output = gr.Audio(label="Audio Summary")
189
 
 
 
 
190
  with gr.Tab("Interactive Q&A"):
191
  user_question = gr.Textbox(label="Ask a Question", placeholder="Enter your question here...")
192
  qa_button = gr.Button("Get Answer")
 
196
 
197
  content_type.change(content_input_update, inputs=[content_type], outputs=[file_input, youtube_input, video_input])
198
  submit_btn = gr.Button("Submit")
199
+ submit_btn.click(main, inputs=[content_type, file_input, youtube_input, video_input, language, summarize],
200
+ outputs=[summary_output, audio_output])
201
 
202
  demo.launch(share=True)