Prat0 commited on
Commit
eebb559
·
verified ·
1 Parent(s): 1c66a46

Create pages/Video_Chat.py

Browse files
Files changed (1) hide show
  1. pages/Video_Chat.py +177 -0
pages/Video_Chat.py ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ from llama_index.core.indices.vector_store.base import VectorStoreIndex
4
+ from llama_index.vector_stores.qdrant import QdrantVectorStore
5
+ from llama_index.embeddings.fastembed import FastEmbedEmbedding
6
+ from llama_index.core import Settings
7
+ from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
8
+ from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
9
+ from llama_index.readers.youtube_transcript.utils import is_youtube_video
10
+ import qdrant_client
11
+ from llama_index.core.indices.query.schema import QueryBundle
12
+ from llama_index.llms.gemini import Gemini
13
+ from llama_index.embeddings.gemini import GeminiEmbedding
14
+ from llama_index.core.memory import ChatMemoryBuffer
15
+ from llama_index.readers.web import FireCrawlWebReader
16
+ from llama_index.core import SummaryIndex
17
+ import streamlit_analytics2 as streamlit_analytics
18
+ import time
19
+ import dotenv
20
+
21
+ dotenv.load_dotenv()
22
+ # Set page config
23
+ #st.set_page_config(page_title="Talk to Software Documentation", page_icon="📚", layout="wide")
24
+
25
+ # Initialize session state
26
+ if 'setup_complete' not in st.session_state:
27
+ st.session_state['setup_complete'] = False
28
+ if 'documents' not in st.session_state:
29
+ st.session_state['documents'] = None
30
+ if 'chat_history' not in st.session_state:
31
+ st.session_state['chat_history'] = []
32
+ if 'index' not in st.session_state:
33
+ st.session_state['index'] = None
34
+ if 'url' not in st.session_state:
35
+ st.session_state['url'] = ""
36
+ if 'collection_name' not in st.session_state:
37
+ st.session_state['collection_name'] = ""
38
+ if 'query' not in st.session_state:
39
+ st.session_state['query'] = ""
40
+
41
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
42
+ os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
43
+
44
+ # Setup functions
45
+ def embed_setup():
46
+ Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5")
47
+ Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro")
48
+
49
+ def qdrant_setup():
50
+ client = qdrant_client.QdrantClient(
51
+ os.getenv("QDRANT_URL"),
52
+ api_key = os.getenv("QDRANT_API_KEY"),
53
+ )
54
+ return client
55
+
56
+ def llm_setup():
57
+ llm = Gemini(api_key=os.getenv("GOOGLE_API_KEY"), temperature=0.1, model_name="models/gemini-pro")
58
+ return llm
59
+
60
+ def query_index(index, streaming=True):
61
+ memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
62
+ chat_engine = index.as_chat_engine(
63
+ chat_mode="context",
64
+ memory=memory,
65
+ system_prompt=(
66
+ """You are an AI assistant for developers, specializing in technical documentation. Your task is to provide accurate, concise, and helpful responses based on the given documentation context.
67
+ Context information is below:
68
+ {context_str}
69
+ Always answer based on the information in the context and general knowledge and be precise
70
+ Given this context, please respond to the following user query:
71
+ {query_str}
72
+ Your response should:
73
+ Directly address the query using information from the context
74
+ Include relevant code examples or direct quotes if applicable
75
+ Mention specific sections or pages of the documentation
76
+ Highlight any best practices or potential pitfalls related to the query
77
+ After your response, suggest 3 follow-up questions based on the context that the user might find helpful for deeper understanding.
78
+ Your response:"""
79
+ ),
80
+ )
81
+ return chat_engine
82
+
83
+ # Document ingestion function
84
+ def ingest_documents(url):
85
+ loader = YoutubeTranscriptReader()
86
+
87
+ if is_youtube_video(url):
88
+ documents = loader.load_data(
89
+ ytlinks=[url]
90
+ )
91
+ return documents
92
+ else:
93
+ st.error("Link not supported unfortunately, the link should follow the format: <https://youtube.com/watch?v={video_id}> ")
94
+
95
+
96
+
97
+ # Streamlit app
98
+ st.title("Talk to Software Documentation")
99
+
100
+ st.markdown("""
101
+ This tool allows you to chat with Video Content. Here's how to use it:
102
+ 1. Enter the URL of the documentation you want to chat about (optional if using an existing collection).
103
+ 2. Enter the collection name for the vector store.
104
+ 3. Click the "Ingest and Setup" button to crawl the documentation (if URL provided) and set up the query engine.
105
+ 4. Once setup is complete, enter your query in the text box.
106
+ 5. Click "Search" to get a response based on the documentation.
107
+ 6. View your chat history in the sidebar.
108
+ """)
109
+
110
+ with streamlit_analytics.track():
111
+ # URL input for document ingestion
112
+ st.session_state['url'] = st.text_input("Enter URL to crawl and ingest documents (optional):", value=st.session_state['url'])
113
+
114
+ # Collection name input
115
+ st.session_state['collection_name'] = st.text_input("Enter collection name for vector store:", value=st.session_state['collection_name'])
116
+
117
+ # Combined Ingest and Setup button
118
+ if st.button("Ingest and Setup"):
119
+ with st.spinner("Setting up query engine..."):
120
+ embed_setup()
121
+ client = qdrant_setup()
122
+ llm = llm_setup()
123
+ vector_store = QdrantVectorStore(client=client, collection_name=st.session_state['collection_name'])
124
+ storage_context = StorageContext.from_defaults(vector_store=vector_store)
125
+
126
+ if st.session_state['url']:
127
+ st.session_state['documents'] = ingest_documents(st.session_state['url'])
128
+ st.session_state['index'] = VectorStoreIndex.from_documents(st.session_state['documents'], vector_store=vector_store, storage_context=storage_context)
129
+ st.success(f"Documents ingested from {st.session_state['url']} and query engine setup completed successfully!")
130
+ else:
131
+ st.session_state['index'] = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context)
132
+ st.success(f"Query engine setup completed successfully using existing collection: {st.session_state['collection_name']}")
133
+
134
+ st.session_state['setup_complete'] = True
135
+
136
+ # Query input
137
+ st.session_state['query'] = st.text_input("Enter your query:", value=st.session_state['query'])
138
+
139
+ # Search button
140
+ if st.button("Search"):
141
+ if not st.session_state['setup_complete']:
142
+ st.error("Please complete the setup first")
143
+ elif st.session_state['query']:
144
+ with st.spinner("Searching..."):
145
+ try:
146
+ chat_engine = query_index(st.session_state['index'])
147
+ response = chat_engine.chat(st.session_state['query'])
148
+ except Exception as e:
149
+ st.error(f"An error occurred: {str(e)}")
150
+ st.info("Retrying in 120 seconds...")
151
+ time.sleep(120)
152
+ try:
153
+ chat_engine = query_index(st.session_state['index'])
154
+ response = chat_engine.chat(st.session_state['query'])
155
+ except Exception as e:
156
+ st.error(f"Retry failed. Error: {str(e)}")
157
+ st.stop()
158
+
159
+ # Add the query and response to chat history
160
+ st.session_state['chat_history'].append(("User", st.session_state['query']))
161
+ st.session_state['chat_history'].append(("Assistant", str(response.response)))
162
+
163
+ # Display the most recent response prominently
164
+ st.subheader("Assistant's Response:")
165
+ st.write(response.response)
166
+ else:
167
+ st.error("Please enter a query")
168
+
169
+ # Sidebar for chat history
170
+ st.sidebar.title("Chat History")
171
+ for role, message in st.session_state['chat_history']:
172
+ st.sidebar.text(f"{role}: {message}")
173
+
174
+ # Clear chat history button in sidebar
175
+ if st.sidebar.button("Clear Chat History"):
176
+ st.session_state['chat_history'] = []
177
+ st.sidebar.success("Chat history cleared!")