Spaces:

quantumbit
/

check

Sleeping

App Files Files Community

quantumbit commited on 7 days ago

Commit

d24de5d

verified ·

1 Parent(s): 4ec4487

Update app.py

Browse files

Files changed (1) hide show

app.py +141 -74

app.py CHANGED Viewed

@@ -1,85 +1,152 @@
-from flask import Flask, request, jsonify
-from selenium import webdriver
-from selenium.webdriver.chrome.options import Options
-from selenium.webdriver.common.by import By
 from bs4 import BeautifulSoup
-import time
-app = Flask(__name__)
-def get_hidden_code(url):
-    chrome_options = Options()
-    chrome_options.add_argument("--headless=new")
-    chrome_options.add_argument("--no-sandbox")
-    chrome_options.add_argument("--disable-dev-shm-usage")
-    chrome_options.add_argument("--disable-gpu")
-    chrome_options.add_argument("--disable-software-rasterizer")
-    chrome_options.add_argument("--incognito")
-    chrome_options.add_argument("--remote-debugging-port=9222")
-    driver = webdriver.Chrome(options=chrome_options)
-    token_found = []
-    try:
-        driver.get(url)
-        time.sleep(3)
-        # Try clicking the button
-        try:
-            start_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Start Challenge')]")
-            start_button.click()
-            time.sleep(3)
-        except Exception as e:
-            print("⚠️ Could not click Start Challenge button:", e)
-        html = driver.page_source
-        soup = BeautifulSoup(html, "html.parser")
-        # Scrape hidden elements
-        for tag in soup.find_all(True):
-            if tag.has_attr("style") and "display:none" in tag["style"]:
-                token_found.append(tag.get_text(strip=True))
-        for hidden in soup.find_all("input", {"type": "hidden"}):
-            if hidden.get("value"):
-                token_found.append(hidden["value"])
-        if "hidden code" in html.lower():
-            start = html.lower().find("hidden code")
-            token_found.append(html[start:start+100])
-        driver.quit()
-        return list(set(token_found)) or ["No hidden code found"]
     except Exception as e:
-        driver.quit()
-        return [f"Error: {str(e)}"]
-@app.route("/mission", methods=["POST"])
-def mission():
-    data = request.json
-    url = data.get("url")
-    questions = data.get("questions", [])
-    if not url:
-        return jsonify({"error": "URL is required"}), 400
-    tokens = get_hidden_code(url)
-    print("\n===== Mission Request =====")
-    print("URL received:", url)
-    print("Questions received:", questions)
-    print("Token(s) found:", tokens)
-    print("===========================\n")
-    return jsonify({
-        "url_received": url,
-        "questions_received": questions,
-        "token_found": tokens
-    })
-if __name__ == "__main__":
-    import os
-    port = int(os.environ.get("PORT", 7860))
-    app.run(host="0.0.0.0", port=port, debug=False)

+import os
+import re
+import logging
+from typing import List, Dict, Any
+from fastapi import FastAPI
+from pydantic import BaseModel
+import requests
 from bs4 import BeautifulSoup
+# -------------------------
+# Logging
+# -------------------------
+logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
+logger = logging.getLogger("hackrx-round5")
+# -------------------------
+# FastAPI app
+# -------------------------
+app = FastAPI(title="HackRx Round 5 API", version="1.0.0")
+# -------------------------
+# Models
+# -------------------------
+class ChallengeRequest(BaseModel):
+    url: str
+    questions: List[str]
+class ChallengeResponse(BaseModel):
+    answers: List[str]
+# -------------------------
+# Scraper
+# -------------------------
+def scrape_with_requests(url: str) -> Dict[str, Any]:
+    """Scrape a webpage and extract visible + hidden info."""
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
+            "AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/118.0 Safari/537.36"
+        }
+        r = requests.get(url, headers=headers, timeout=30)
+        r.raise_for_status()
+        html = r.text
+        soup = BeautifulSoup(html, "html.parser")
+        title = soup.title.get_text(strip=True) if soup.title else "No title"
+        visible_text = soup.get_text(separator=" ", strip=True)[:6000]
+        hidden_values: List[str] = []
+        # Hidden inputs
+        for inp in soup.find_all("input", {"type": "hidden"}):
+            name = inp.get("name", "")
+            value = inp.get("value", "")
+            if value:
+                hidden_values.append(f"hidden_input {name}={value}")
+        # Elements with display:none
+        for elem in soup.find_all(attrs={"style": re.compile(r"display\s*:\s*none", re.I)}):
+            txt = elem.get_text(" ", strip=True)
+            if txt:
+                hidden_values.append(f"display_none {txt}")
+        # HTML comments
+        for comment in soup.find_all(string=lambda t: isinstance(t, str) and ("<!--" in t or "-->" in t)):
+            c = str(comment).strip()
+            if c:
+                hidden_values.append(f"comment {c}")
+        # data-* attributes
+        for tag in soup.find_all():
+            for k, v in tag.attrs.items():
+                if k.startswith("data-") and isinstance(v, str) and v.strip():
+                    hidden_values.append(f"{k}={v.strip()}")
+        return {
+            "title": title,
+            "visible_text": visible_text,
+            "hidden_values": hidden_values[:200],
+        }
     except Exception as e:
+        logger.error(f"Request scraping failed for {url}: {e}")
+        return {}
+def answer_question(question: str, content: Dict[str, Any]) -> str:
+    """Simple rule-based extraction for Round 5 questions."""
+    ql = question.lower()
+    title = content.get("title", "")
+    visible = content.get("visible_text", "")
+    hidden = content.get("hidden_values", [])
+    # Look into hidden values
+    for h in hidden:
+        if "challenge" in ql and "name" in ql and "challenge" in h.lower():
+            return h.split("=", 1)[-1].strip()
+        if "challenge id" in ql and "id" in h.lower():
+            return h.split("=", 1)[-1].strip()
+        if "completion code" in ql or "hidden code" in ql or "code" in ql:
+            if "code" in h.lower():
+                return h.split("=", 1)[-1].strip()
+    # Fallbacks
+    if "challenge name" in ql and title:
+        return title.strip()
+    if "challenge id" in ql:
+        m = re.search(r"challenge\s*id\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
+        if m:
+            return m.group(1)
+    if "completion code" in ql or "hidden code" in ql:
+        m = re.search(r"(?:completion|hidden)\s*code\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
+        if m:
+            return m.group(1)
+    return "Challenge information not found"
+# -------------------------
+# Routes
+# -------------------------
+@app.get("/")
+def root():
+    return {
+        "message": "HackRx Round 5 API - Ready",
+        "endpoints": {"challenge": "POST /challenge", "health": "GET /health"},
+    }
+@app.get("/health")
+def health():
+    return {"status": "healthy"}
+@app.post("/challenge", response_model=ChallengeResponse)
+def challenge(req: ChallengeRequest):
+    logger.info(f"Round 5 request: url={req.url}, questions={req.questions}")
+    content = scrape_with_requests(req.url)
+    if not content:
+        return ChallengeResponse(answers=["Challenge information not found" for _ in req.questions])
+    answers = []
+    for q in req.questions:
+        ans = answer_question(q, content)
+        answers.append(ans)
+        logger.info(f"Q: {q} → A: {ans}")
+    return ChallengeResponse(answers=answers)