Create pages/Video_Chat.py
Browse files- pages/Video_Chat.py +177 -0
pages/Video_Chat.py
ADDED
@@ -0,0 +1,177 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import os
|
3 |
+
from llama_index.core.indices.vector_store.base import VectorStoreIndex
|
4 |
+
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
5 |
+
from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
6 |
+
from llama_index.core import Settings
|
7 |
+
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
|
8 |
+
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
9 |
+
from llama_index.readers.youtube_transcript.utils import is_youtube_video
|
10 |
+
import qdrant_client
|
11 |
+
from llama_index.core.indices.query.schema import QueryBundle
|
12 |
+
from llama_index.llms.gemini import Gemini
|
13 |
+
from llama_index.embeddings.gemini import GeminiEmbedding
|
14 |
+
from llama_index.core.memory import ChatMemoryBuffer
|
15 |
+
from llama_index.readers.web import FireCrawlWebReader
|
16 |
+
from llama_index.core import SummaryIndex
|
17 |
+
import streamlit_analytics2 as streamlit_analytics
|
18 |
+
import time
|
19 |
+
import dotenv
|
20 |
+
|
21 |
+
dotenv.load_dotenv()
|
22 |
+
# Set page config
|
23 |
+
#st.set_page_config(page_title="Talk to Software Documentation", page_icon="📚", layout="wide")
|
24 |
+
|
25 |
+
# Initialize session state
|
26 |
+
if 'setup_complete' not in st.session_state:
|
27 |
+
st.session_state['setup_complete'] = False
|
28 |
+
if 'documents' not in st.session_state:
|
29 |
+
st.session_state['documents'] = None
|
30 |
+
if 'chat_history' not in st.session_state:
|
31 |
+
st.session_state['chat_history'] = []
|
32 |
+
if 'index' not in st.session_state:
|
33 |
+
st.session_state['index'] = None
|
34 |
+
if 'url' not in st.session_state:
|
35 |
+
st.session_state['url'] = ""
|
36 |
+
if 'collection_name' not in st.session_state:
|
37 |
+
st.session_state['collection_name'] = ""
|
38 |
+
if 'query' not in st.session_state:
|
39 |
+
st.session_state['query'] = ""
|
40 |
+
|
41 |
+
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
42 |
+
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
43 |
+
|
44 |
+
# Setup functions
|
45 |
+
def embed_setup():
|
46 |
+
Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
|
47 |
+
Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro")
|
48 |
+
|
49 |
+
def qdrant_setup():
|
50 |
+
client = qdrant_client.QdrantClient(
|
51 |
+
os.getenv("QDRANT_URL"),
|
52 |
+
api_key = os.getenv("QDRANT_API_KEY"),
|
53 |
+
)
|
54 |
+
return client
|
55 |
+
|
56 |
+
def llm_setup():
|
57 |
+
llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
|
58 |
+
return llm
|
59 |
+
|
60 |
+
def query_index(index, streaming=True):
|
61 |
+
memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
|
62 |
+
chat_engine = index.as_chat_engine(
|
63 |
+
chat_mode="context",
|
64 |
+
memory=memory,
|
65 |
+
system_prompt=(
|
66 |
+
"""You are an AI assistant for developers, specializing in technical documentation. Your task is to provide accurate, concise, and helpful responses based on the given documentation context.
|
67 |
+
Context information is below:
|
68 |
+
{context_str}
|
69 |
+
Always answer based on the information in the context and general knowledge and be precise
|
70 |
+
Given this context, please respond to the following user query:
|
71 |
+
{query_str}
|
72 |
+
Your response should:
|
73 |
+
Directly address the query using information from the context
|
74 |
+
Include relevant code examples or direct quotes if applicable
|
75 |
+
Mention specific sections or pages of the documentation
|
76 |
+
Highlight any best practices or potential pitfalls related to the query
|
77 |
+
After your response, suggest 3 follow-up questions based on the context that the user might find helpful for deeper understanding.
|
78 |
+
Your response:"""
|
79 |
+
),
|
80 |
+
)
|
81 |
+
return chat_engine
|
82 |
+
|
83 |
+
# Document ingestion function
|
84 |
+
def ingest_documents(url):
|
85 |
+
loader = YoutubeTranscriptReader()
|
86 |
+
|
87 |
+
if is_youtube_video(url):
|
88 |
+
documents = loader.load_data(
|
89 |
+
ytlinks=[url]
|
90 |
+
)
|
91 |
+
return documents
|
92 |
+
else:
|
93 |
+
st.error("Link not supported unfortunately, the link should follow the format: <https://youtube.com/watch?v={video_id}> ")
|
94 |
+
|
95 |
+
|
96 |
+
|
97 |
+
# Streamlit app
|
98 |
+
st.title("Talk to Software Documentation")
|
99 |
+
|
100 |
+
st.markdown("""
|
101 |
+
This tool allows you to chat with Video Content. Here's how to use it:
|
102 |
+
1. Enter the URL of the documentation you want to chat about (optional if using an existing collection).
|
103 |
+
2. Enter the collection name for the vector store.
|
104 |
+
3. Click the "Ingest and Setup" button to crawl the documentation (if URL provided) and set up the query engine.
|
105 |
+
4. Once setup is complete, enter your query in the text box.
|
106 |
+
5. Click "Search" to get a response based on the documentation.
|
107 |
+
6. View your chat history in the sidebar.
|
108 |
+
""")
|
109 |
+
|
110 |
+
with streamlit_analytics.track():
|
111 |
+
# URL input for document ingestion
|
112 |
+
st.session_state['url'] = st.text_input("Enter URL to crawl and ingest documents (optional):", value=st.session_state['url'])
|
113 |
+
|
114 |
+
# Collection name input
|
115 |
+
st.session_state['collection_name'] = st.text_input("Enter collection name for vector store:", value=st.session_state['collection_name'])
|
116 |
+
|
117 |
+
# Combined Ingest and Setup button
|
118 |
+
if st.button("Ingest and Setup"):
|
119 |
+
with st.spinner("Setting up query engine..."):
|
120 |
+
embed_setup()
|
121 |
+
client = qdrant_setup()
|
122 |
+
llm = llm_setup()
|
123 |
+
vector_store = QdrantVectorStore(client=client, collection_name=st.session_state['collection_name'])
|
124 |
+
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
125 |
+
|
126 |
+
if st.session_state['url']:
|
127 |
+
st.session_state['documents'] = ingest_documents(st.session_state['url'])
|
128 |
+
st.session_state['index'] = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store, storage_context=storage_context)
|
129 |
+
st.success(f"Documents ingested from {st.session_state['url']} and query engine setup completed successfully!")
|
130 |
+
else:
|
131 |
+
st.session_state['index'] = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context)
|
132 |
+
st.success(f"Query engine setup completed successfully using existing collection: {st.session_state['collection_name']}")
|
133 |
+
|
134 |
+
st.session_state['setup_complete'] = True
|
135 |
+
|
136 |
+
# Query input
|
137 |
+
st.session_state['query'] = st.text_input("Enter your query:", value=st.session_state['query'])
|
138 |
+
|
139 |
+
# Search button
|
140 |
+
if st.button("Search"):
|
141 |
+
if not st.session_state['setup_complete']:
|
142 |
+
st.error("Please complete the setup first")
|
143 |
+
elif st.session_state['query']:
|
144 |
+
with st.spinner("Searching..."):
|
145 |
+
try:
|
146 |
+
chat_engine = query_index(st.session_state['index'])
|
147 |
+
response = chat_engine.chat(st.session_state['query'])
|
148 |
+
except Exception as e:
|
149 |
+
st.error(f"An error occurred: {str(e)}")
|
150 |
+
st.info("Retrying in 120 seconds...")
|
151 |
+
time.sleep(120)
|
152 |
+
try:
|
153 |
+
chat_engine = query_index(st.session_state['index'])
|
154 |
+
response = chat_engine.chat(st.session_state['query'])
|
155 |
+
except Exception as e:
|
156 |
+
st.error(f"Retry failed. Error: {str(e)}")
|
157 |
+
st.stop()
|
158 |
+
|
159 |
+
# Add the query and response to chat history
|
160 |
+
st.session_state['chat_history'].append(("User", st.session_state['query']))
|
161 |
+
st.session_state['chat_history'].append(("Assistant", str(response.response)))
|
162 |
+
|
163 |
+
# Display the most recent response prominently
|
164 |
+
st.subheader("Assistant's Response:")
|
165 |
+
st.write(response.response)
|
166 |
+
else:
|
167 |
+
st.error("Please enter a query")
|
168 |
+
|
169 |
+
# Sidebar for chat history
|
170 |
+
st.sidebar.title("Chat History")
|
171 |
+
for role, message in st.session_state['chat_history']:
|
172 |
+
st.sidebar.text(f"{role}: {message}")
|
173 |
+
|
174 |
+
# Clear chat history button in sidebar
|
175 |
+
if st.sidebar.button("Clear Chat History"):
|
176 |
+
st.session_state['chat_history'] = []
|
177 |
+
st.sidebar.success("Chat history cleared!")
|