shukdevdatta123 commited on
Commit
11c31e8
·
verified ·
1 Parent(s): 78e4317

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -257
app.py CHANGED
@@ -1,268 +1,42 @@
1
- import gradio as gr
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from urllib.parse import urljoin, urlparse
5
- import re
6
- from openai import OpenAI
7
- import time
8
- import copy
9
 
10
- # Function to check if URL belongs to the website
11
- def is_valid_url(url, base_url):
12
- parsed_url = urlparse(url)
13
- parsed_base = urlparse(base_url)
14
- return parsed_url.netloc == parsed_base.netloc
15
 
16
- # Function to scrape content from a single page
17
- def scrape_page(url):
18
- try:
19
- response = requests.get(url, timeout=10)
20
- if response.status_code == 200:
21
- soup = BeautifulSoup(response.text, 'html.parser')
22
-
23
- # Remove script, style elements and comments
24
- for element in soup(['script', 'style', 'header', 'footer', 'nav']):
25
- element.decompose()
26
-
27
- # Get text content
28
- text = soup.get_text(separator=' ', strip=True)
29
-
30
- # Clean up whitespace
31
- text = re.sub(r'\s+', ' ', text).strip()
32
-
33
- return text
34
- else:
35
- return None
36
- except Exception as e:
37
- print(f"Error scraping {url}: {e}")
38
- return None
39
 
40
- # Function to crawl website and get all links
41
- def crawl_website(base_url, max_pages=80):
42
- print(f"Starting to crawl {base_url}")
43
- visited_urls = set()
44
- urls_to_visit = [base_url]
45
- site_content = {}
46
-
47
- while urls_to_visit and len(visited_urls) < max_pages:
48
- current_url = urls_to_visit.pop(0)
49
-
50
- if current_url in visited_urls:
51
- continue
52
-
53
- print(f"Crawling: {current_url}")
54
- visited_urls.add(current_url)
55
-
56
- try:
57
- response = requests.get(current_url, timeout=10)
58
- if response.status_code == 200:
59
- # Get content of the current page
60
- content = scrape_page(current_url)
61
- if content:
62
- site_content[current_url] = content
63
-
64
- # Find all links on the page
65
- soup = BeautifulSoup(response.text, 'html.parser')
66
- for link in soup.find_all('a', href=True):
67
- href = link['href']
68
- full_url = urljoin(current_url, href)
69
-
70
- # Only follow links that are part of the same website
71
- if is_valid_url(full_url, base_url) and full_url not in visited_urls:
72
- urls_to_visit.append(full_url)
73
-
74
- # Add a small delay to be respectful
75
- time.sleep(0.5)
76
-
77
- except Exception as e:
78
- print(f"Error visiting {current_url}: {e}")
79
-
80
- print(f"Crawled {len(visited_urls)} pages and collected content from {len(site_content)} pages.")
81
- return site_content
82
 
83
- # Function that creates a context from the scraped content
84
- def create_context(site_content, max_context_length=8000):
85
- context = "Content from https://innovativeskillsbd.com website:\n\n"
86
-
87
- for url, content in site_content.items():
88
- # Add URL and a portion of its content (limited to keep context manageable)
89
- page_content = f"Page: {url}\n{content[:1000]}...\n\n"
90
-
91
- # Check if adding this would exceed max context length
92
- if len(context) + len(page_content) > max_context_length:
93
- break
94
-
95
- context += page_content
96
-
97
- return context
98
 
99
- # Function to fix URLs in text to ensure they point to the correct domain
100
- def fix_urls_in_text(text):
101
- # Look for URLs in the text
102
- url_pattern = r'https?://[^\s/$.?#].[^\s]*'
103
- urls = re.findall(url_pattern, text)
104
-
105
- for url in urls:
106
- # If the URL contains the wrong domain but appears to be an InnovativeSkills link
107
- if ('innovative-skill.com' in url or 'innovativeskill.com' in url) and 'innovativeskillsbd.com' not in url:
108
- # Create the correct URL by replacing the domain
109
- path = urlparse(url).path
110
- correct_url = f"https://innovativeskillsbd.com{path}"
111
- # Replace in the text
112
- text = text.replace(url, correct_url)
113
-
114
- return text
115
 
116
- # Function to query the DeepSeek V3 model
117
- def query_model(api_key, messages):
118
- try:
119
- client = OpenAI(
120
- base_url="https://openrouter.ai/api/v1",
121
- api_key=api_key,
122
- )
123
-
124
- completion = client.chat.completions.create(
125
- extra_headers={
126
- "HTTP-Referer": "https://innovativeskillsbd.com",
127
- "X-Title": "InnovativeSkills ChatBot",
128
- },
129
- model="deepseek/deepseek-chat-v3-0324:free",
130
- messages=messages
131
- )
132
-
133
- response = completion.choices[0].message.content
134
-
135
- # Fix any incorrect URLs - ensure all links point to the correct domain
136
- response = fix_urls_in_text(response)
137
-
138
- return response
139
- except Exception as e:
140
- return f"Error querying the model: {str(e)}"
141
 
142
- # Function to answer questions based on website content
143
- def answer_question(api_key, question, site_content, history):
144
- if not api_key:
145
- return "Please enter your OpenRouter API key.", history
146
-
147
- # Prepare the context from scraped content
148
- context = create_context(site_content)
149
-
150
- # Create system message with context
151
- system_message = {
152
- "role": "system",
153
- "content": f"""You are a helpful AI assistant for InnovativeSkills Bangladesh, a website focused on helping people learn IT skills.
154
- Use the following content from the website to answer user questions. If the question is not related to the website or the
155
- information is not available in the content, politely say so and try to provide general guidance related to InnovativeSkills.
156
-
157
- IMPORTANT: When referring to any URLs related to the website, ALWAYS use the domain 'innovativeskillsbd.com' (NOT 'innovative-skill.com' or 'innovativeskill.com').
158
- For example, use 'https://innovativeskillsbd.com/student-job-success' instead of any other domain.
159
-
160
- {context}"""
161
- }
162
-
163
- # Create user message
164
- user_message = {"role": "user", "content": question}
165
-
166
- # Create message history for the API call
167
- messages = [system_message]
168
-
169
- # Add conversation history
170
- for user_msg, assistant_msg in history:
171
- messages.append({"role": "user", "content": user_msg})
172
- messages.append({"role": "assistant", "content": assistant_msg})
173
-
174
- # Add current question
175
- messages.append(user_message)
176
-
177
- # Query the model
178
- response = query_model(api_key, messages)
179
-
180
- # Update history by adding the new exchange
181
- new_history = copy.deepcopy(history)
182
- new_history.append((question, response))
183
- return response, new_history
184
 
185
- # Scrape the website when the app starts
186
- def init_scraper(progress=gr.Progress()):
187
- base_url = "https://innovativeskillsbd.com/"
188
- progress(0, desc="Starting website crawler...")
189
- site_content = crawl_website(base_url)
190
- progress(1, desc="Finished crawling website")
191
- return site_content
192
 
193
- # Create Gradio interface
194
- def create_interface(site_content):
195
- with gr.Blocks() as app:
196
- gr.Markdown("# InnovativeSkills Bangladesh Chatbot")
197
- gr.Markdown("This chatbot uses DeepSeek V3 to answer questions about InnovativeSkills Bangladesh website.")
198
-
199
- with gr.Row():
200
- api_key_input = gr.Textbox(
201
- label="OpenRouter API Key",
202
- placeholder="Enter your OpenRouter API key",
203
- type="password"
204
- )
205
-
206
- chatbot = gr.Chatbot(height=500, show_copy_button=True)
207
- msg = gr.Textbox(label="Ask a question about InnovativeSkills Bangladesh")
208
-
209
- # Container for site content (hidden from UI)
210
- site_content_state = gr.State(site_content)
211
-
212
- # Container for chat history
213
- chat_history = gr.State([])
214
-
215
- # Button to start the conversation
216
- clear = gr.Button("Clear conversation")
217
-
218
- # Events
219
- def user_input(api_key, message, site_content, history):
220
- if not message:
221
- return "", chatbot, history
222
-
223
- # Process the response
224
- bot_response, updated_history = answer_question(api_key, message, site_content, history)
225
-
226
- # Format history for chatbot display
227
- chatbot_display = []
228
- for user_msg, bot_msg in updated_history:
229
- chatbot_display.append([user_msg, bot_msg])
230
-
231
- return "", chatbot_display, updated_history
232
-
233
- msg.submit(
234
- user_input,
235
- inputs=[api_key_input, msg, site_content_state, chat_history],
236
- outputs=[msg, chatbot, chat_history]
237
- )
238
-
239
- def clear_chat():
240
- return "", [], []
241
-
242
- clear.click(
243
- clear_chat,
244
- outputs=[msg, chatbot, chat_history]
245
- )
246
-
247
- return app
248
-
249
- # Initialize and launch the app
250
- def main():
251
- print("Starting to initialize the InnovativeSkills chatbot...")
252
-
253
- # First, scrape the website content
254
- site_content = {}
255
- try:
256
- site_content = crawl_website("https://innovativeskillsbd.com/")
257
- except Exception as e:
258
- print(f"Error during initial website crawling: {e}")
259
- print("The chatbot will still work, but without initial website content.")
260
-
261
- # Create the Gradio interface with the site content
262
- app = create_interface(site_content)
263
-
264
- # Launch the app
265
- app.launch()
266
 
267
  if __name__ == "__main__":
268
- main()
 
 
 
 
1
+ from Crypto.Cipher import AES
2
+ from Crypto.Protocol.KDF import PBKDF2
3
+ import os
4
+ import tempfile
5
+ from dotenv import load_dotenv
 
 
 
6
 
7
+ load_dotenv() # Load all environment variables
 
 
 
 
8
 
9
+ def unpad(data):
10
+ return data[:-data[-1]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
+ def decrypt_and_run():
13
+ # Get password from Hugging Face Secrets environment variable
14
+ password = os.getenv("PASSWORD")
15
+ if not password:
16
+ raise ValueError("PASSWORD secret not found in environment variables")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ password = password.encode()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ with open("code.enc", "rb") as f:
21
+ encrypted = f.read()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ salt = encrypted[:16]
24
+ iv = encrypted[16:32]
25
+ ciphertext = encrypted[32:]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
+ key = PBKDF2(password, salt, dkLen=32, count=1000000)
28
+ cipher = AES.new(key, AES.MODE_CBC, iv)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ plaintext = unpad(cipher.decrypt(ciphertext))
 
 
 
 
 
 
31
 
32
+ with tempfile.NamedTemporaryFile(suffix=".py", delete=False, mode='wb') as tmp:
33
+ tmp.write(plaintext)
34
+ tmp.flush()
35
+ print(f"[INFO] Running decrypted code from {tmp.name}")
36
+ os.system(f"python {tmp.name}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  if __name__ == "__main__":
39
+ decrypt_and_run()
40
+
41
+ # This script decrypts the encrypted code and runs it.
42
+ # Ensure you have the PASSWORD secret set in your Hugging Face Secrets