Spaces:

JaganathC
/

Video_Summ

Running

App Files Files Community

JaganathC commited on Mar 15

Commit

0d27e5e

verified ·

1 Parent(s): ca92cc9

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -14

app.py CHANGED Viewed

@@ -51,18 +51,15 @@ def download_youtube_audio(url):
             'preferredcodec': 'wav',
             'preferredquality': '192',
         }],
-        'outtmpl': output_path,
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
-        if os.path.exists(output_path + ".wav"):
-            os.rename(output_path + ".wav", output_path)  # Ensure correct naming
     except Exception as e:
         return f"Error downloading audio: {str(e)}"
-    return output_path if os.path.exists(output_path) else "Download Failed"
 def transcribe_audio(file_path):
     """Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
@@ -96,9 +93,8 @@ def transcribe_audio(file_path):
     result = []
     try:
         with open(output_file, "r") as f:
-            for line in f:
-                chunk = json.loads(line.strip())  # Read JSON line by line
-                result.append(chunk.get("text", ""))
     except Exception as e:
         return f"Error reading transcription file: {str(e)}"
@@ -110,18 +106,24 @@ def transcribe_audio(file_path):
 def generate_summary_stream(transcription):
     """Summarizes the transcription efficiently to avoid memory overflow."""
     detected_language = langdetect.detect(transcription[:1000])  # Detect using a smaller portion
     # Use smaller chunks for processing
-    chunk_size = 2000
     transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
     summary_result = []
-    for chunk in transcript_chunks[:3]:  # Process only the first 3 chunks to avoid OOM
         prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
-        input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
-        output_ids = model.generate(input_ids, max_length=300)  # Limit output size
-        response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
         summary_result.append(response)
     return "\n\n".join(summary_result)
@@ -167,4 +169,4 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
-demo.launch()

             'preferredcodec': 'wav',
             'preferredquality': '192',
         }],
+        'outtmpl': output_path[:-4]  # Remove .wav to prevent duplication
     }
     try:
         with yt_dlp.YoutubeDL(ydl_opts) as ydl:
             ydl.download([url])
+        return output_path if os.path.exists(output_path) else "Download Failed"
     except Exception as e:
         return f"Error downloading audio: {str(e)}"
 def transcribe_audio(file_path):
     """Transcribes audio using `insanely-fast-whisper` and handles large files efficiently."""
     result = []
     try:
         with open(output_file, "r") as f:
+            data = json.load(f)  # Load full JSON safely
+        result = [chunk.get("text", "") for chunk in data]
     except Exception as e:
         return f"Error reading transcription file: {str(e)}"
 def generate_summary_stream(transcription):
     """Summarizes the transcription efficiently to avoid memory overflow."""
+    if not transcription:
+        return "No transcription available."
     detected_language = langdetect.detect(transcription[:1000])  # Detect using a smaller portion
     # Use smaller chunks for processing
+    chunk_size = 1000  # Reduce chunk size
     transcript_chunks = [transcription[i:i+chunk_size] for i in range(0, len(transcription), chunk_size)]
     summary_result = []
+    for chunk in transcript_chunks[:5]:  # Process only the first 5 chunks
         prompt = f"""Summarize the following video transcription in 150-300 words in {detected_language}:\n{chunk}"""
+        try:
+            input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device)
+            output_ids = model.generate(input_ids, max_length=300)  # Limit output size
+            response = tokenizer.decode(output_ids[0], skip_special_tokens=True)
+        except Exception as e:
+            response = f"Error generating summary: {str(e)}"
         summary_result.append(response)
     return "\n\n".join(summary_result)
     url_button.click(process_youtube, inputs=[url_input], outputs=[transcription_output, summary_output])
     summary_button.click(generate_summary_stream, inputs=[transcription_output], outputs=[summary_output])
+demo.launch(share=True, debug=True, queue=True)