Spaces:

shukdevdatta123
/

INNOVBOT

Sleeping

App Files Files Community

shukdevdatta123 commited on 6 days ago

Commit

11c31e8

verified ·

1 Parent(s): 78e4317

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -257

app.py CHANGED Viewed

@@ -1,268 +1,42 @@
-import gradio as gr
-import requests
-from bs4 import BeautifulSoup
-from urllib.parse import urljoin, urlparse
-import re
-from openai import OpenAI
-import time
-import copy
-# Function to check if URL belongs to the website
-def is_valid_url(url, base_url):
-    parsed_url = urlparse(url)
-    parsed_base = urlparse(base_url)
-    return parsed_url.netloc == parsed_base.netloc
-# Function to scrape content from a single page
-def scrape_page(url):
-    try:
-        response = requests.get(url, timeout=10)
-        if response.status_code == 200:
-            soup = BeautifulSoup(response.text, 'html.parser')
-            # Remove script, style elements and comments
-            for element in soup(['script', 'style', 'header', 'footer', 'nav']):
-                element.decompose()
-            # Get text content
-            text = soup.get_text(separator=' ', strip=True)
-            # Clean up whitespace
-            text = re.sub(r'\s+', ' ', text).strip()
-            return text
-        else:
-            return None
-    except Exception as e:
-        print(f"Error scraping {url}: {e}")
-        return None
-# Function to crawl website and get all links
-def crawl_website(base_url, max_pages=80):
-    print(f"Starting to crawl {base_url}")
-    visited_urls = set()
-    urls_to_visit = [base_url]
-    site_content = {}
-    while urls_to_visit and len(visited_urls) < max_pages:
-        current_url = urls_to_visit.pop(0)
-        if current_url in visited_urls:
-            continue
-        print(f"Crawling: {current_url}")
-        visited_urls.add(current_url)
-        try:
-            response = requests.get(current_url, timeout=10)
-            if response.status_code == 200:
-                # Get content of the current page
-                content = scrape_page(current_url)
-                if content:
-                    site_content[current_url] = content
-                # Find all links on the page
-                soup = BeautifulSoup(response.text, 'html.parser')
-                for link in soup.find_all('a', href=True):
-                    href = link['href']
-                    full_url = urljoin(current_url, href)
-                    # Only follow links that are part of the same website
-                    if is_valid_url(full_url, base_url) and full_url not in visited_urls:
-                        urls_to_visit.append(full_url)
-            # Add a small delay to be respectful
-            time.sleep(0.5)
-        except Exception as e:
-            print(f"Error visiting {current_url}: {e}")
-    print(f"Crawled {len(visited_urls)} pages and collected content from {len(site_content)} pages.")
-    return site_content
-# Function that creates a context from the scraped content
-def create_context(site_content, max_context_length=8000):
-    context = "Content from https://innovativeskillsbd.com website:\n\n"
-    for url, content in site_content.items():
-        # Add URL and a portion of its content (limited to keep context manageable)
-        page_content = f"Page: {url}\n{content[:1000]}...\n\n"
-        # Check if adding this would exceed max context length
-        if len(context) + len(page_content) > max_context_length:
-            break
-        context += page_content
-    return context
-# Function to fix URLs in text to ensure they point to the correct domain
-def fix_urls_in_text(text):
-    # Look for URLs in the text
-    url_pattern = r'https?://[^\s/$.?#].[^\s]*'
-    urls = re.findall(url_pattern, text)
-    for url in urls:
-        # If the URL contains the wrong domain but appears to be an InnovativeSkills link
-        if ('innovative-skill.com' in url or 'innovativeskill.com' in url) and 'innovativeskillsbd.com' not in url:
-            # Create the correct URL by replacing the domain
-            path = urlparse(url).path
-            correct_url = f"https://innovativeskillsbd.com{path}"
-            # Replace in the text
-            text = text.replace(url, correct_url)
-    return text
-# Function to query the DeepSeek V3 model
-def query_model(api_key, messages):
-    try:
-        client = OpenAI(
-            base_url="https://openrouter.ai/api/v1",
-            api_key=api_key,
-        )
-        completion = client.chat.completions.create(
-            extra_headers={
-                "HTTP-Referer": "https://innovativeskillsbd.com",
-                "X-Title": "InnovativeSkills ChatBot",
-            },
-            model="deepseek/deepseek-chat-v3-0324:free",
-            messages=messages
-        )
-        response = completion.choices[0].message.content
-        # Fix any incorrect URLs - ensure all links point to the correct domain
-        response = fix_urls_in_text(response)
-        return response
-    except Exception as e:
-        return f"Error querying the model: {str(e)}"
-# Function to answer questions based on website content
-def answer_question(api_key, question, site_content, history):
-    if not api_key:
-        return "Please enter your OpenRouter API key.", history
-    # Prepare the context from scraped content
-    context = create_context(site_content)
-    # Create system message with context
-    system_message = {
-        "role": "system",
-        "content": f"""You are a helpful AI assistant for InnovativeSkills Bangladesh, a website focused on helping people learn IT skills.
-        Use the following content from the website to answer user questions. If the question is not related to the website or the
-        information is not available in the content, politely say so and try to provide general guidance related to InnovativeSkills.
-        IMPORTANT: When referring to any URLs related to the website, ALWAYS use the domain 'innovativeskillsbd.com' (NOT 'innovative-skill.com' or 'innovativeskill.com').
-        For example, use 'https://innovativeskillsbd.com/student-job-success' instead of any other domain.
-        {context}"""
-    }
-    # Create user message
-    user_message = {"role": "user", "content": question}
-    # Create message history for the API call
-    messages = [system_message]
-    # Add conversation history
-    for user_msg, assistant_msg in history:
-        messages.append({"role": "user", "content": user_msg})
-        messages.append({"role": "assistant", "content": assistant_msg})
-    # Add current question
-    messages.append(user_message)
-    # Query the model
-    response = query_model(api_key, messages)
-    # Update history by adding the new exchange
-    new_history = copy.deepcopy(history)
-    new_history.append((question, response))
-    return response, new_history
-# Scrape the website when the app starts
-def init_scraper(progress=gr.Progress()):
-    base_url = "https://innovativeskillsbd.com/"
-    progress(0, desc="Starting website crawler...")
-    site_content = crawl_website(base_url)
-    progress(1, desc="Finished crawling website")
-    return site_content
-# Create Gradio interface
-def create_interface(site_content):
-    with gr.Blocks() as app:
-        gr.Markdown("# InnovativeSkills Bangladesh Chatbot")
-        gr.Markdown("This chatbot uses DeepSeek V3 to answer questions about InnovativeSkills Bangladesh website.")
-        with gr.Row():
-            api_key_input = gr.Textbox(
-                label="OpenRouter API Key",
-                placeholder="Enter your OpenRouter API key",
-                type="password"
-            )
-        chatbot = gr.Chatbot(height=500, show_copy_button=True)
-        msg = gr.Textbox(label="Ask a question about InnovativeSkills Bangladesh")
-        # Container for site content (hidden from UI)
-        site_content_state = gr.State(site_content)
-        # Container for chat history
-        chat_history = gr.State([])
-        # Button to start the conversation
-        clear = gr.Button("Clear conversation")
-        # Events
-        def user_input(api_key, message, site_content, history):
-            if not message:
-                return "", chatbot, history
-            # Process the response
-            bot_response, updated_history = answer_question(api_key, message, site_content, history)
-            # Format history for chatbot display
-            chatbot_display = []
-            for user_msg, bot_msg in updated_history:
-                chatbot_display.append([user_msg, bot_msg])
-            return "", chatbot_display, updated_history
-        msg.submit(
-            user_input,
-            inputs=[api_key_input, msg, site_content_state, chat_history],
-            outputs=[msg, chatbot, chat_history]
-        )
-        def clear_chat():
-            return "", [], []
-        clear.click(
-            clear_chat,
-            outputs=[msg, chatbot, chat_history]
-        )
-    return app
-# Initialize and launch the app
-def main():
-    print("Starting to initialize the InnovativeSkills chatbot...")
-    # First, scrape the website content
-    site_content = {}
-    try:
-        site_content = crawl_website("https://innovativeskillsbd.com/")
-    except Exception as e:
-        print(f"Error during initial website crawling: {e}")
-        print("The chatbot will still work, but without initial website content.")
-    # Create the Gradio interface with the site content
-    app = create_interface(site_content)
-    # Launch the app
-    app.launch()
 if __name__ == "__main__":
-    main()

+from Crypto.Cipher import AES
+from Crypto.Protocol.KDF import PBKDF2
+import os
+import tempfile
+from dotenv import load_dotenv
+load_dotenv()  # Load all environment variables
+def unpad(data):
+    return data[:-data[-1]]
+def decrypt_and_run():
+    # Get password from Hugging Face Secrets environment variable
+    password = os.getenv("PASSWORD")
+    if not password:
+        raise ValueError("PASSWORD secret not found in environment variables")
+    password = password.encode()
+    with open("code.enc", "rb") as f:
+        encrypted = f.read()
+    salt = encrypted[:16]
+    iv = encrypted[16:32]
+    ciphertext = encrypted[32:]
+    key = PBKDF2(password, salt, dkLen=32, count=1000000)
+    cipher = AES.new(key, AES.MODE_CBC, iv)
+    plaintext = unpad(cipher.decrypt(ciphertext))
+    with tempfile.NamedTemporaryFile(suffix=".py", delete=False, mode='wb') as tmp:
+        tmp.write(plaintext)
+        tmp.flush()
+        print(f"[INFO] Running decrypted code from {tmp.name}")
+        os.system(f"python {tmp.name}")
 if __name__ == "__main__":
+    decrypt_and_run()
+# This script decrypts the encrypted code and runs it.
+# Ensure you have the PASSWORD secret set in your Hugging Face Secrets