Samizie commited on
Commit
10d0120
Β·
verified Β·
1 Parent(s): 007522a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +176 -173
app.py CHANGED
@@ -1,173 +1,176 @@
1
- import streamlit as st
2
- from decouple import config
3
- import asyncio
4
- from langchain.chains import create_retrieval_chain
5
- from langchain.chains.combine_documents import create_stuff_documents_chain
6
- from langchain_groq import ChatGroq
7
- from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
8
- from langchain_core.messages import SystemMessage
9
- from scraper.scraper import process_urls
10
- from embedding.vector_store import initialize_vector_store, clear_chroma_db
11
- from conversation.talks import clean_input, small_talks
12
-
13
- #Clearing ChromaDB at startup to clean up any previous data
14
- clear_chroma_db()
15
-
16
-
17
-
18
-
19
- #Groq API Key
20
- groq_api = config("GROQ_API_KEY")
21
-
22
- #Initializing LLM with memory
23
- llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)
24
-
25
-
26
-
27
- #Ensure proper asyncio handling for Windows
28
- import sys
29
- if sys.platform.startswith("win"):
30
- asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
31
-
32
- #Async helper function
33
- def run_asyncio_coroutine(coro):
34
- loop = asyncio.new_event_loop()
35
- asyncio.set_event_loop(loop)
36
- return loop.run_until_complete(coro)
37
-
38
- import streamlit as st
39
-
40
- st.title("WebGPT 1.0 πŸ€–")
41
-
42
- # URL inputs
43
- urls = st.text_area("Enter URLs (one per line)")
44
- run_scraper = st.button("Run Scraper", disabled=not urls.strip())
45
-
46
- # Sessions & states
47
- if "messages" not in st.session_state:
48
- st.session_state.messages = [] # Chat history
49
- if "history" not in st.session_state:
50
- st.session_state.history = "" # Stores past Q&A for memory
51
- if "scraping_done" not in st.session_state:
52
- st.session_state.scraping_done = False
53
- if "vector_store" not in st.session_state:
54
- st.session_state.vector_store = None
55
-
56
- # Run scraper
57
- if run_scraper:
58
- st.write("Fetching and processing URLs... This may take a while.")
59
- split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
60
- st.session_state.vector_store = initialize_vector_store(split_docs)
61
- st.session_state.scraping_done = True
62
- st.success("Scraping and processing completed!")
63
-
64
- # βœ… Clear chat button
65
- if st.button("Clear Chat"):
66
- st.session_state.messages = [] # Reset message history
67
- st.session_state.history = "" # Reset history tracking
68
- st.success("Chat cleared!")
69
-
70
- # Ensuring chat only enables after scraping
71
- if not st.session_state.scraping_done:
72
- st.warning("Scrape some data first to enable chat!")
73
- else:
74
- st.write("### Chat With WebGPT πŸ’¬")
75
-
76
- # Display chat history
77
- for message in st.session_state.messages:
78
- role, text = message["role"], message["text"]
79
- with st.chat_message(role):
80
- st.write(text)
81
-
82
- # Takes in user input
83
- user_query = st.chat_input("Ask a question...")
84
-
85
- if user_query:
86
- st.session_state.messages.append({"role": "user", "text": user_query})
87
- with st.chat_message("user"):
88
- st.write(user_query)
89
-
90
- user_query_cleaned = clean_input(user_query)
91
- response = "" # Default value for response
92
- source_url = "" # Default value for source url
93
-
94
- # Check for small talk responses
95
- if user_query_cleaned in small_talks:
96
- response = small_talks[user_query_cleaned]
97
- source_url = "Knowledge base" # Small talk comes from the knowledge base
98
-
99
- else:
100
- # βœ… Setup retriever (with a similarity threshold or top-k retrieval)
101
- retriever = st.session_state.vector_store.as_retriever(
102
- search_kwargs={'k': 5}
103
- )
104
-
105
- # βœ… Retrieve context
106
- retrieved_docs = retriever.invoke(user_query_cleaned)
107
- retrieved_text = " ".join([doc.page_content for doc in retrieved_docs])
108
-
109
- # βœ… Define Langchain PromptTemplate properly
110
- system_prompt_template = PromptTemplate(
111
- input_variables=["context", "query"],
112
- template="""
113
- You are WebGPT, an AI assistant for question-answering tasks that **only answers questions based on the provided context**.
114
-
115
- - Understand the context {context} first and provide a relevant answer.
116
- - If the answer is **not** found in the Context, reply with: "I can't find your request in the provided context."
117
- - If the question is **unrelated** to the Context, reply with: "I can't answer that. do not generate responses."
118
- - **Do not** use external knowledge, assumptions, or filler responses. Stick to the context provided.
119
- - Keep responses clear, concise, and relevant to the user’s query.
120
-
121
- Context:
122
- {context}
123
-
124
- Now, answer the user's question:
125
- {input}
126
- """
127
- )
128
-
129
- # βœ… Generate prompt with retrieved context & user query
130
- final_prompt = system_prompt_template.format(
131
- context=retrieved_text,
132
- input=user_query_cleaned
133
- )
134
-
135
- # βœ… Create chains (ensure the prompt is correct)
136
- scraper_chain = create_stuff_documents_chain(llm=llm, prompt=system_prompt_template)
137
- llm_chain = create_retrieval_chain(retriever, scraper_chain)
138
-
139
- # βœ… Process response and source
140
- if retrieved_docs:
141
- try:
142
- response_data = llm_chain.invoke({"context": retrieved_text, "input": user_query_cleaned})
143
- response = response_data.get("answer", "").strip()
144
- source_url = retrieved_docs[0].metadata.get("source", "Unknown")
145
-
146
- # Fallback if response is still empty
147
- if not response:
148
- response = "I can't find your request in the provided context."
149
- source_url = "No source found"
150
-
151
- except Exception as e:
152
- response = f"Error generating response: {str(e)}"
153
- source_url = "Error"
154
-
155
- else:
156
- response = "I can't find your request in the provided context."
157
- source_url = "No source found"
158
-
159
- # βœ… Track history & update session state
160
- history_text = "\n".join(
161
- [f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
162
- )
163
- st.session_state.history = history_text
164
-
165
- # βœ… Format and display response
166
- formatted_response = f"**Answer:** {response}"
167
- if response != "I can't find your request in the provided context." and source_url:
168
- formatted_response += f"\n\n**Source:** {source_url}"
169
-
170
- st.session_state.messages.append({"role": "assistant", "text": formatted_response})
171
- with st.chat_message("assistant"):
172
- st.write(formatted_response)
173
-
 
 
 
 
1
+ import subprocess
2
+ import streamlit as st
3
+ from decouple import config
4
+ import asyncio
5
+ from langchain.chains import create_retrieval_chain
6
+ from langchain.chains.combine_documents import create_stuff_documents_chain
7
+ from langchain_groq import ChatGroq
8
+ from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
9
+ from langchain_core.messages import SystemMessage
10
+ from scraper.scraper import process_urls
11
+ from embedding.vector_store import initialize_vector_store, clear_chroma_db
12
+ from conversation.talks import clean_input, small_talks
13
+
14
+ subprocess.run(["playwright", "install"], check=True)
15
+
16
+ #Clearing ChromaDB at startup to clean up any previous data
17
+ clear_chroma_db()
18
+
19
+
20
+
21
+
22
+ #Groq API Key
23
+ groq_api = config("GROQ_API_KEY")
24
+
25
+ #Initializing LLM with memory
26
+ llm = ChatGroq(model="llama-3.2-1b-preview", groq_api_key=groq_api, temperature=0)
27
+
28
+
29
+
30
+ #Ensure proper asyncio handling for Windows
31
+ import sys
32
+ if sys.platform.startswith("win"):
33
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
34
+
35
+ #Async helper function
36
+ def run_asyncio_coroutine(coro):
37
+ loop = asyncio.new_event_loop()
38
+ asyncio.set_event_loop(loop)
39
+ return loop.run_until_complete(coro)
40
+
41
+ import streamlit as st
42
+
43
+ st.title("WebGPT 1.0 πŸ€–")
44
+
45
+ # URL inputs
46
+ urls = st.text_area("Enter URLs (one per line)")
47
+ run_scraper = st.button("Run Scraper", disabled=not urls.strip())
48
+
49
+ # Sessions & states
50
+ if "messages" not in st.session_state:
51
+ st.session_state.messages = [] # Chat history
52
+ if "history" not in st.session_state:
53
+ st.session_state.history = "" # Stores past Q&A for memory
54
+ if "scraping_done" not in st.session_state:
55
+ st.session_state.scraping_done = False
56
+ if "vector_store" not in st.session_state:
57
+ st.session_state.vector_store = None
58
+
59
+ # Run scraper
60
+ if run_scraper:
61
+ st.write("Fetching and processing URLs... This may take a while.")
62
+ split_docs = run_asyncio_coroutine(process_urls(urls.split("\n")))
63
+ st.session_state.vector_store = initialize_vector_store(split_docs)
64
+ st.session_state.scraping_done = True
65
+ st.success("Scraping and processing completed!")
66
+
67
+ # βœ… Clear chat button
68
+ if st.button("Clear Chat"):
69
+ st.session_state.messages = [] # Reset message history
70
+ st.session_state.history = "" # Reset history tracking
71
+ st.success("Chat cleared!")
72
+
73
+ # Ensuring chat only enables after scraping
74
+ if not st.session_state.scraping_done:
75
+ st.warning("Scrape some data first to enable chat!")
76
+ else:
77
+ st.write("### Chat With WebGPT πŸ’¬")
78
+
79
+ # Display chat history
80
+ for message in st.session_state.messages:
81
+ role, text = message["role"], message["text"]
82
+ with st.chat_message(role):
83
+ st.write(text)
84
+
85
+ # Takes in user input
86
+ user_query = st.chat_input("Ask a question...")
87
+
88
+ if user_query:
89
+ st.session_state.messages.append({"role": "user", "text": user_query})
90
+ with st.chat_message("user"):
91
+ st.write(user_query)
92
+
93
+ user_query_cleaned = clean_input(user_query)
94
+ response = "" # Default value for response
95
+ source_url = "" # Default value for source url
96
+
97
+ # Check for small talk responses
98
+ if user_query_cleaned in small_talks:
99
+ response = small_talks[user_query_cleaned]
100
+ source_url = "Knowledge base" # Small talk comes from the knowledge base
101
+
102
+ else:
103
+ # βœ… Setup retriever (with a similarity threshold or top-k retrieval)
104
+ retriever = st.session_state.vector_store.as_retriever(
105
+ search_kwargs={'k': 5}
106
+ )
107
+
108
+ # βœ… Retrieve context
109
+ retrieved_docs = retriever.invoke(user_query_cleaned)
110
+ retrieved_text = " ".join([doc.page_content for doc in retrieved_docs])
111
+
112
+ # βœ… Define Langchain PromptTemplate properly
113
+ system_prompt_template = PromptTemplate(
114
+ input_variables=["context", "query"],
115
+ template="""
116
+ You are WebGPT, an AI assistant for question-answering tasks that **only answers questions based on the provided context**.
117
+
118
+ - Understand the context {context} first and provide a relevant answer.
119
+ - If the answer is **not** found in the Context, reply with: "I can't find your request in the provided context."
120
+ - If the question is **unrelated** to the Context, reply with: "I can't answer that. do not generate responses."
121
+ - **Do not** use external knowledge, assumptions, or filler responses. Stick to the context provided.
122
+ - Keep responses clear, concise, and relevant to the user’s query.
123
+
124
+ Context:
125
+ {context}
126
+
127
+ Now, answer the user's question:
128
+ {input}
129
+ """
130
+ )
131
+
132
+ # βœ… Generate prompt with retrieved context & user query
133
+ final_prompt = system_prompt_template.format(
134
+ context=retrieved_text,
135
+ input=user_query_cleaned
136
+ )
137
+
138
+ # βœ… Create chains (ensure the prompt is correct)
139
+ scraper_chain = create_stuff_documents_chain(llm=llm, prompt=system_prompt_template)
140
+ llm_chain = create_retrieval_chain(retriever, scraper_chain)
141
+
142
+ # βœ… Process response and source
143
+ if retrieved_docs:
144
+ try:
145
+ response_data = llm_chain.invoke({"context": retrieved_text, "input": user_query_cleaned})
146
+ response = response_data.get("answer", "").strip()
147
+ source_url = retrieved_docs[0].metadata.get("source", "Unknown")
148
+
149
+ # Fallback if response is still empty
150
+ if not response:
151
+ response = "I can't find your request in the provided context."
152
+ source_url = "No source found"
153
+
154
+ except Exception as e:
155
+ response = f"Error generating response: {str(e)}"
156
+ source_url = "Error"
157
+
158
+ else:
159
+ response = "I can't find your request in the provided context."
160
+ source_url = "No source found"
161
+
162
+ # βœ… Track history & update session state
163
+ history_text = "\n".join(
164
+ [f"User: {msg['text']}" if msg["role"] == "user" else f"AI: {msg['text']}" for msg in st.session_state.messages]
165
+ )
166
+ st.session_state.history = history_text
167
+
168
+ # βœ… Format and display response
169
+ formatted_response = f"**Answer:** {response}"
170
+ if response != "I can't find your request in the provided context." and source_url:
171
+ formatted_response += f"\n\n**Source:** {source_url}"
172
+
173
+ st.session_state.messages.append({"role": "assistant", "text": formatted_response})
174
+ with st.chat_message("assistant"):
175
+ st.write(formatted_response)
176
+