Illia56 commited on
Commit
3b29bee
Β·
1 Parent(s): d29f9a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -67
app.py CHANGED
@@ -15,21 +15,22 @@ import streamlit as st
15
  from pytube import YouTube
16
  # import replicate
17
 
18
-
19
-
20
-
21
-
22
- models = '''| Model | Llama2 | Llama2-hf | Llama2-chat | Llama2-chat-hf |
23
- |---|---|---|---|---|
24
- | 70B | [Link](https://huggingface.co/meta-llama/Llama-2-70b) | [Link](https://huggingface.co/meta-llama/Llama-2-70b-hf) | [Link](https://huggingface.co/meta-llama/Llama-2-70b-chat) | [Link](https://huggingface.co/meta-llama/Llama-2-70b-chat-hf) |
25
- ---'''
26
-
 
27
 
28
  DESCRIPTION = """
29
- Welcome to the **YouTube Video Chatbot** powered by the state-of-the-art Llama-2-70b model. Here's what you can do:
30
  - **Transcribe & Understand**: Provide any YouTube video URL, and our system will transcribe it. Our advanced NLP model will then understand the content, ready to answer your questions.
31
  - **Ask Anything**: Based on the video's content, ask any question, and get instant, context-aware answers.
32
- To get started, simply paste a YouTube video URL in the sidebar and start chatting with the model about the video's content. Enjoy the experience!
33
  """
34
  st.title("YouTube Video Chatbot")
35
  st.markdown(DESCRIPTION)
@@ -37,10 +38,9 @@ st.markdown(DESCRIPTION)
37
  def get_video_title(youtube_url: str) -> str:
38
  yt = YouTube(youtube_url)
39
  embed_url = f"https://www.youtube.com/embed/{yt.video_id}"
40
- embed_html = f'<iframe src="{embed_url}" frameborder="0" allowfullscreen></iframe>'
41
  return yt.title, embed_html
42
 
43
-
44
  def transcribe_video(youtube_url: str, path: str) -> List[Document]:
45
  """
46
  Transcribe a video and return its content as a Document.
@@ -48,23 +48,16 @@ def transcribe_video(youtube_url: str, path: str) -> List[Document]:
48
  logging.info(f"Transcribing video: {youtube_url}")
49
  client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
50
  result = client.predict(youtube_url, "translate", True, fn_index=7)
51
- return [Document(page_content=result[1], metadata=dict(page=1))]
52
 
53
- def predict(message: str, system_prompt: str = '', temperature: float = 0.7, max_new_tokens: int = 4096,
54
- topp: float = 0.5, repetition_penalty: float = 1.2) -> Any:
 
55
  """
56
  Predict a response using a client.
57
  """
58
- client = Client("https://ysharma-explore-llamav2-with-tgi.hf.space/")
59
- response = client.predict(
60
- message,
61
- system_prompt,
62
- temperature,
63
- max_new_tokens,
64
- topp,
65
- repetition_penalty,
66
- api_name="/chat_1"
67
- )
68
  return response
69
 
70
  PATH = os.path.join(os.path.expanduser("~"), "Data")
@@ -72,16 +65,20 @@ PATH = os.path.join(os.path.expanduser("~"), "Data")
72
  def initialize_session_state():
73
  if "youtube_url" not in st.session_state:
74
  st.session_state.youtube_url = ""
75
- if "setup_done" not in st.session_state: # Initialize the setup_done flag
 
 
76
  st.session_state.setup_done = False
77
  if "doneYoutubeurl" not in st.session_state:
78
  st.session_state.doneYoutubeurl = ""
79
 
80
  def sidebar():
81
  with st.sidebar:
82
- st.markdown("Enter the YouTube Video URL belowπŸ”—\n")
83
  st.session_state.youtube_url = st.text_input("YouTube Video URL:")
84
 
 
 
85
 
86
  if st.session_state.youtube_url:
87
  # Get the video title
@@ -89,17 +86,7 @@ def sidebar():
89
  st.markdown(f"### {video_title}")
90
 
91
  # Embed the video
92
- st.markdown(
93
- embed_html,
94
- unsafe_allow_html=True
95
- )
96
-
97
- # system_promptSide = st.text_input("Optional system prompt:")
98
- # temperatureSide = st.slider("Temperature", min_value=0.0, max_value=1.0, value=0.9, step=0.05)
99
- # max_new_tokensSide = st.slider("Max new tokens", min_value=0.0, max_value=4096.0, value=4096.0, step=64.0)
100
- # ToppSide = st.slider("Top-p (nucleus sampling)", min_value=0.0, max_value=1.0, value=0.6, step=0.05)
101
- # RepetitionpenaltySide = st.slider("Repetition penalty", min_value=0.0, max_value=2.0, value=1.2, step=0.05)
102
-
103
 
104
  sidebar()
105
  initialize_session_state()
@@ -124,9 +111,9 @@ class LlamaLLM(LLM):
124
  def _llm_type(self) -> str:
125
  return "custom"
126
 
127
- def _call(self, prompt: str, stop: Optional[List[str]] = None,
128
- run_manager: Optional[CallbackManagerForLLMRun] = None) -> str:
129
- response = predict(prompt)
130
  return response
131
 
132
  @property
@@ -134,46 +121,45 @@ class LlamaLLM(LLM):
134
  """Get the identifying parameters."""
135
  return {}
136
 
137
-
138
  # Check if a new YouTube URL is provided
139
  if st.session_state.youtube_url != st.session_state.doneYoutubeurl:
140
  st.session_state.setup_done = False
141
 
142
- if st.session_state.youtube_url and not st.session_state.setup_done :
143
  with st.status("Transcribing video..."):
144
- data = transcribe_video(st.session_state.youtube_url, PATH)
145
-
146
  with st.status("Running Embeddings..."):
147
- docs = text_splitter.split_documents(data)
148
 
149
- docsearch = FAISS.from_documents(docs, embeddings)
150
- retriever = docsearch.as_retriever()
151
- retriever.search_kwargs['distance_metric'] = 'cos'
152
- retriever.search_kwargs['k'] = 4
153
  with st.status("Running RetrievalQA..."):
154
- llama_instance = LlamaLLM()
155
- st.session_state.qa = RetrievalQA.from_chain_type(llm=llama_instance, chain_type="stuff", retriever=retriever,chain_type_kwargs={"prompt": prompt})
156
-
157
  st.session_state.doneYoutubeurl = st.session_state.youtube_url
158
  st.session_state.setup_done = True # Mark the setup as done for this URL
159
 
160
  if "messages" not in st.session_state:
161
- st.session_state.messages = []
162
 
163
  for message in st.session_state.messages:
164
- with st.chat_message(message["role"], avatar=("πŸ§‘β€πŸ’»" if message["role"] == 'human' else 'πŸ¦™')):
165
  st.markdown(message["content"])
166
 
167
- textinput = st.chat_input("Ask LLama-2-70b anything about the video...")
168
 
169
  if prompt := textinput:
170
- st.chat_message("human",avatar = "πŸ§‘β€πŸ’»").markdown(prompt)
171
- st.session_state.messages.append({"role": "human", "content": prompt})
172
- with st.status("Requesting Client..."):
173
- video_title, _ = get_video_title(st.session_state.youtube_url)
174
- additional_context = f"Given the context about a video titled '{video_title}' available at '{st.session_state.youtube_url}'."
175
- response = st.session_state.qa.run( prompt)
176
- with st.chat_message("assistant", avatar='πŸ¦™'):
177
- st.markdown(response)
178
- # Add assistant response to chat history
179
- st.session_state.messages.append({"role": "assistant", "content": response})
 
15
  from pytube import YouTube
16
  # import replicate
17
 
18
+ models = {
19
+ "Llama2-70b": {
20
+ "model_link": "https://huggingface.co/meta-llama/Llama-2-70b",
21
+ "chat_link": "https://ysharma-explore-llamav2-with-tgi.hf.space/",
22
+ },
23
+ "Llama2-13b": {
24
+ "model_link": "https://huggingface.co/meta-llama/Llama-2-13b",
25
+ "chat_link": "https://huggingface-projects-llama-2-13b-chat.hf.space/",
26
+ }
27
+ }
28
 
29
  DESCRIPTION = """
30
+ Welcome to the **YouTube Video Chatbot** powered by Llama-2 models. Here's what you can do:
31
  - **Transcribe & Understand**: Provide any YouTube video URL, and our system will transcribe it. Our advanced NLP model will then understand the content, ready to answer your questions.
32
  - **Ask Anything**: Based on the video's content, ask any question, and get instant, context-aware answers.
33
+ To get started, simply paste a YouTube video URL and select a model in the sidebar, then start chatting with the model about the video's content. Enjoy the experience!
34
  """
35
  st.title("YouTube Video Chatbot")
36
  st.markdown(DESCRIPTION)
 
38
  def get_video_title(youtube_url: str) -> str:
39
  yt = YouTube(youtube_url)
40
  embed_url = f"https://www.youtube.com/embed/{yt.video_id}"
41
+ embed_html = f'<iframe src="{embed_url}" frameborder="0" allowfullscreen></iframe>'
42
  return yt.title, embed_html
43
 
 
44
  def transcribe_video(youtube_url: str, path: str) -> List[Document]:
45
  """
46
  Transcribe a video and return its content as a Document.
 
48
  logging.info(f"Transcribing video: {youtube_url}")
49
  client = Client("https://sanchit-gandhi-whisper-jax.hf.space/")
50
  result = client.predict(youtube_url, "translate", True, fn_index=7)
51
+ return [Document(page_content=result[1], metadata=dict(page=1)]
52
 
53
+ def predict(
54
+ message: str, system_prompt: str = "", model_url: str = models["Llama2-70b"]["chat_link"]
55
+ ) -> Any:
56
  """
57
  Predict a response using a client.
58
  """
59
+ client = Client(model_url)
60
+ response = client.predict(message, system_prompt, 0.7, 4096, 0.5, 1.2, api_name="/chat_1")
 
 
 
 
 
 
 
 
61
  return response
62
 
63
  PATH = os.path.join(os.path.expanduser("~"), "Data")
 
65
  def initialize_session_state():
66
  if "youtube_url" not in st.session_state:
67
  st.session_state.youtube_url = ""
68
+ if "model_choice" not in st.session_state:
69
+ st.session_state.model_choice = "Llama2-70b"
70
+ if "setup_done" not in st.session_state:
71
  st.session_state.setup_done = False
72
  if "doneYoutubeurl" not in st.session_state:
73
  st.session_state.doneYoutubeurl = ""
74
 
75
  def sidebar():
76
  with st.sidebar:
77
+ st.markdown("Enter the YouTube Video URL belowπŸ”—")
78
  st.session_state.youtube_url = st.text_input("YouTube Video URL:")
79
 
80
+ model_choice = st.radio("Choose a Model:", list(models.keys()))
81
+ st.session_state.model_choice = model_choice
82
 
83
  if st.session_state.youtube_url:
84
  # Get the video title
 
86
  st.markdown(f"### {video_title}")
87
 
88
  # Embed the video
89
+ st.markdown(embed_html, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
90
 
91
  sidebar()
92
  initialize_session_state()
 
111
  def _llm_type(self) -> str:
112
  return "custom"
113
 
114
+ def _call(self, prompt: str, stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None) -> str:
115
+ model_link = models[st.session_state.model_choice]["chat_link"]
116
+ response = predict(prompt, model_url=model_link)
117
  return response
118
 
119
  @property
 
121
  """Get the identifying parameters."""
122
  return {}
123
 
 
124
  # Check if a new YouTube URL is provided
125
  if st.session_state.youtube_url != st.session_state.doneYoutubeurl:
126
  st.session_state.setup_done = False
127
 
128
+ if st.session_state.youtube_url and not st.session_state.setup_done:
129
  with st.status("Transcribing video..."):
130
+ data = transcribe_video(st.session_state.youtube_url, PATH)
131
+
132
  with st.status("Running Embeddings..."):
133
+ docs = text_splitter.split_documents(data)
134
 
135
+ docsearch = FAISS.from_documents(docs, embeddings)
136
+ retriever = docsearch.as_retriever()
137
+ retriever.search_kwargs["distance_metric"] = "cos"
138
+ retriever.search_kwargs["k"] = 4
139
  with st.status("Running RetrievalQA..."):
140
+ llama_instance = LlamaLLM()
141
+ st.session_state.qa = RetrievalQA.from_chain_type(llm=llama_instance, chain_type="stuff", retriever=retriever, chain_type_kwargs={"prompt": prompt})
142
+
143
  st.session_state.doneYoutubeurl = st.session_state.youtube_url
144
  st.session_state.setup_done = True # Mark the setup as done for this URL
145
 
146
  if "messages" not in st.session_state:
147
+ st.session_state.messages = []
148
 
149
  for message in st.session_state.messages:
150
+ with st.chat_message(message["role"], avatar=("πŸ§‘β€πŸ’»" if message["role"] == "human" else "πŸ¦™")):
151
  st.markdown(message["content"])
152
 
153
+ textinput = st.chat_input("Ask anything about the video...")
154
 
155
  if prompt := textinput:
156
+ st.chat_message("human", avatar="πŸ§‘β€πŸ’»").markdown(prompt)
157
+ st.session_state.messages.append({"role": "human", "content": prompt})
158
+ with st.status("Requesting Client..."):
159
+ video_title, _ = get_video_title(st.session_state.youtube_url)
160
+ additional_context = f"Given the context about a video titled '{video_title}' available at '{st.session_state.youtube_url}'."
161
+ response = st.session_state.qa.run(prompt + " " + additional_context)
162
+ with st.chat_message("assistant", avatar="πŸ¦™"):
163
+ st.markdown(response)
164
+ # Add assistant response to chat history
165
+ st.session_state.messages.append({"role": "assistant", "content": response})