Spaces:
Sleeping
Sleeping
commit
Browse files
app.py
CHANGED
@@ -1,12 +1,23 @@
|
|
1 |
import streamlit as st
|
2 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
3 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
# Load BART model and tokenizer
|
6 |
model_name = 'facebook/bart-large-cnn'
|
7 |
tokenizer = BartTokenizer.from_pretrained(model_name)
|
8 |
model = BartForConditionalGeneration.from_pretrained(model_name)
|
9 |
|
|
|
10 |
@st.cache
|
11 |
def get_transcript(url):
|
12 |
try:
|
@@ -17,8 +28,9 @@ def get_transcript(url):
|
|
17 |
transcript_text += item['text'] + "\n"
|
18 |
return transcript_text
|
19 |
except Exception as e:
|
20 |
-
return
|
21 |
|
|
|
22 |
@st.cache
|
23 |
def summarize_transcript(transcript):
|
24 |
input_ids = tokenizer.encode("summarize: " + transcript, return_tensors="pt", max_length=1024, truncation=True)
|
@@ -26,19 +38,18 @@ def summarize_transcript(transcript):
|
|
26 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
27 |
return summary
|
28 |
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
st.subheader("Summary:")
|
41 |
-
st.write(summary)
|
42 |
|
|
|
43 |
if __name__ == "__main__":
|
44 |
-
|
|
|
1 |
import streamlit as st
|
2 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
3 |
from youtube_transcript_api import YouTubeTranscriptApi
|
4 |
+
from fastapi import FastAPI, Request
|
5 |
+
from fastapi.responses import JSONResponse
|
6 |
+
import uvicorn
|
7 |
+
|
8 |
+
# Initialize Streamlit app
|
9 |
+
st.title("YouTube Video Transcription Summarizer")
|
10 |
+
video_url = st.text_input("Enter YouTube Video URL:")
|
11 |
+
|
12 |
+
# Initialize FastAPI app
|
13 |
+
app = FastAPI()
|
14 |
|
15 |
# Load BART model and tokenizer
|
16 |
model_name = 'facebook/bart-large-cnn'
|
17 |
tokenizer = BartTokenizer.from_pretrained(model_name)
|
18 |
model = BartForConditionalGeneration.from_pretrained(model_name)
|
19 |
|
20 |
+
# Function to fetch transcript from YouTube URL
|
21 |
@st.cache
|
22 |
def get_transcript(url):
|
23 |
try:
|
|
|
28 |
transcript_text += item['text'] + "\n"
|
29 |
return transcript_text
|
30 |
except Exception as e:
|
31 |
+
return None
|
32 |
|
33 |
+
# Function to summarize transcript
|
34 |
@st.cache
|
35 |
def summarize_transcript(transcript):
|
36 |
input_ids = tokenizer.encode("summarize: " + transcript, return_tensors="pt", max_length=1024, truncation=True)
|
|
|
38 |
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
|
39 |
return summary
|
40 |
|
41 |
+
# API endpoint to summarize transcript
|
42 |
+
@app.post("/summarize_transcript")
|
43 |
+
async def summarize_transcript_api(request: Request):
|
44 |
+
data = await request.json()
|
45 |
+
video_url = data.get('video_url')
|
46 |
+
transcript = get_transcript(video_url)
|
47 |
+
if transcript:
|
48 |
+
summary = summarize_transcript(transcript)
|
49 |
+
return JSONResponse(content={"summary": summary})
|
50 |
+
else:
|
51 |
+
return JSONResponse(content={"error": "Error fetching transcript."}, status_code=400)
|
|
|
|
|
52 |
|
53 |
+
# Run Streamlit app
|
54 |
if __name__ == "__main__":
|
55 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|