quantumbit commited on
Commit
d24de5d
·
verified ·
1 Parent(s): 4ec4487

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +141 -74
app.py CHANGED
@@ -1,85 +1,152 @@
1
- from flask import Flask, request, jsonify
2
- from selenium import webdriver
3
- from selenium.webdriver.chrome.options import Options
4
- from selenium.webdriver.common.by import By
 
 
 
5
  from bs4 import BeautifulSoup
6
- import time
7
 
8
- app = Flask(__name__)
 
 
 
 
9
 
10
- def get_hidden_code(url):
11
- chrome_options = Options()
12
- chrome_options.add_argument("--headless=new")
13
- chrome_options.add_argument("--no-sandbox")
14
- chrome_options.add_argument("--disable-dev-shm-usage")
15
- chrome_options.add_argument("--disable-gpu")
16
- chrome_options.add_argument("--disable-software-rasterizer")
17
- chrome_options.add_argument("--incognito")
18
- chrome_options.add_argument("--remote-debugging-port=9222")
19
 
20
- driver = webdriver.Chrome(options=chrome_options)
21
 
22
- token_found = []
 
 
 
 
 
23
 
24
- try:
25
- driver.get(url)
26
- time.sleep(3)
27
-
28
- # Try clicking the button
29
- try:
30
- start_button = driver.find_element(By.XPATH, "//button[contains(text(), 'Start Challenge')]")
31
- start_button.click()
32
- time.sleep(3)
33
- except Exception as e:
34
- print("⚠️ Could not click Start Challenge button:", e)
35
-
36
- html = driver.page_source
37
- soup = BeautifulSoup(html, "html.parser")
38
-
39
- # Scrape hidden elements
40
- for tag in soup.find_all(True):
41
- if tag.has_attr("style") and "display:none" in tag["style"]:
42
- token_found.append(tag.get_text(strip=True))
43
 
44
- for hidden in soup.find_all("input", {"type": "hidden"}):
45
- if hidden.get("value"):
46
- token_found.append(hidden["value"])
47
 
48
- if "hidden code" in html.lower():
49
- start = html.lower().find("hidden code")
50
- token_found.append(html[start:start+100])
51
 
52
- driver.quit()
53
- return list(set(token_found)) or ["No hidden code found"]
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  except Exception as e:
56
- driver.quit()
57
- return [f"Error: {str(e)}"]
58
-
59
- @app.route("/mission", methods=["POST"])
60
- def mission():
61
- data = request.json
62
- url = data.get("url")
63
- questions = data.get("questions", [])
64
-
65
- if not url:
66
- return jsonify({"error": "URL is required"}), 400
67
-
68
- tokens = get_hidden_code(url)
69
-
70
- print("\n===== Mission Request =====")
71
- print("URL received:", url)
72
- print("Questions received:", questions)
73
- print("Token(s) found:", tokens)
74
- print("===========================\n")
75
-
76
- return jsonify({
77
- "url_received": url,
78
- "questions_received": questions,
79
- "token_found": tokens
80
- })
81
-
82
- if __name__ == "__main__":
83
- import os
84
- port = int(os.environ.get("PORT", 7860))
85
- app.run(host="0.0.0.0", port=port, debug=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import logging
4
+ from typing import List, Dict, Any
5
+ from fastapi import FastAPI
6
+ from pydantic import BaseModel
7
+ import requests
8
  from bs4 import BeautifulSoup
 
9
 
10
+ # -------------------------
11
+ # Logging
12
+ # -------------------------
13
+ logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
14
+ logger = logging.getLogger("hackrx-round5")
15
 
16
+ # -------------------------
17
+ # FastAPI app
18
+ # -------------------------
19
+ app = FastAPI(title="HackRx Round 5 API", version="1.0.0")
 
 
 
 
 
20
 
 
21
 
22
+ # -------------------------
23
+ # Models
24
+ # -------------------------
25
+ class ChallengeRequest(BaseModel):
26
+ url: str
27
+ questions: List[str]
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ class ChallengeResponse(BaseModel):
31
+ answers: List[str]
 
32
 
 
 
 
33
 
34
+ # -------------------------
35
+ # Scraper
36
+ # -------------------------
37
+ def scrape_with_requests(url: str) -> Dict[str, Any]:
38
+ """Scrape a webpage and extract visible + hidden info."""
39
+ try:
40
+ headers = {
41
+ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) "
42
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
43
+ "Chrome/118.0 Safari/537.36"
44
+ }
45
+ r = requests.get(url, headers=headers, timeout=30)
46
+ r.raise_for_status()
47
+ html = r.text
48
+ soup = BeautifulSoup(html, "html.parser")
49
 
50
+ title = soup.title.get_text(strip=True) if soup.title else "No title"
51
+ visible_text = soup.get_text(separator=" ", strip=True)[:6000]
52
+
53
+ hidden_values: List[str] = []
54
+
55
+ # Hidden inputs
56
+ for inp in soup.find_all("input", {"type": "hidden"}):
57
+ name = inp.get("name", "")
58
+ value = inp.get("value", "")
59
+ if value:
60
+ hidden_values.append(f"hidden_input {name}={value}")
61
+
62
+ # Elements with display:none
63
+ for elem in soup.find_all(attrs={"style": re.compile(r"display\s*:\s*none", re.I)}):
64
+ txt = elem.get_text(" ", strip=True)
65
+ if txt:
66
+ hidden_values.append(f"display_none {txt}")
67
+
68
+ # HTML comments
69
+ for comment in soup.find_all(string=lambda t: isinstance(t, str) and ("<!--" in t or "-->" in t)):
70
+ c = str(comment).strip()
71
+ if c:
72
+ hidden_values.append(f"comment {c}")
73
+
74
+ # data-* attributes
75
+ for tag in soup.find_all():
76
+ for k, v in tag.attrs.items():
77
+ if k.startswith("data-") and isinstance(v, str) and v.strip():
78
+ hidden_values.append(f"{k}={v.strip()}")
79
+
80
+ return {
81
+ "title": title,
82
+ "visible_text": visible_text,
83
+ "hidden_values": hidden_values[:200],
84
+ }
85
  except Exception as e:
86
+ logger.error(f"Request scraping failed for {url}: {e}")
87
+ return {}
88
+
89
+
90
+ def answer_question(question: str, content: Dict[str, Any]) -> str:
91
+ """Simple rule-based extraction for Round 5 questions."""
92
+ ql = question.lower()
93
+ title = content.get("title", "")
94
+ visible = content.get("visible_text", "")
95
+ hidden = content.get("hidden_values", [])
96
+
97
+ # Look into hidden values
98
+ for h in hidden:
99
+ if "challenge" in ql and "name" in ql and "challenge" in h.lower():
100
+ return h.split("=", 1)[-1].strip()
101
+ if "challenge id" in ql and "id" in h.lower():
102
+ return h.split("=", 1)[-1].strip()
103
+ if "completion code" in ql or "hidden code" in ql or "code" in ql:
104
+ if "code" in h.lower():
105
+ return h.split("=", 1)[-1].strip()
106
+
107
+ # Fallbacks
108
+ if "challenge name" in ql and title:
109
+ return title.strip()
110
+
111
+ if "challenge id" in ql:
112
+ m = re.search(r"challenge\s*id\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
113
+ if m:
114
+ return m.group(1)
115
+
116
+ if "completion code" in ql or "hidden code" in ql:
117
+ m = re.search(r"(?:completion|hidden)\s*code\s*[:\-]\s*([A-Za-z0-9\-_]+)", visible, flags=re.I)
118
+ if m:
119
+ return m.group(1)
120
+
121
+ return "Challenge information not found"
122
+
123
+
124
+ # -------------------------
125
+ # Routes
126
+ # -------------------------
127
+ @app.get("/")
128
+ def root():
129
+ return {
130
+ "message": "HackRx Round 5 API - Ready",
131
+ "endpoints": {"challenge": "POST /challenge", "health": "GET /health"},
132
+ }
133
+
134
+
135
+ @app.get("/health")
136
+ def health():
137
+ return {"status": "healthy"}
138
+
139
+
140
+ @app.post("/challenge", response_model=ChallengeResponse)
141
+ def challenge(req: ChallengeRequest):
142
+ logger.info(f"Round 5 request: url={req.url}, questions={req.questions}")
143
+ content = scrape_with_requests(req.url)
144
+ if not content:
145
+ return ChallengeResponse(answers=["Challenge information not found" for _ in req.questions])
146
+
147
+ answers = []
148
+ for q in req.questions:
149
+ ans = answer_question(q, content)
150
+ answers.append(ans)
151
+ logger.info(f"Q: {q} → A: {ans}")
152
+ return ChallengeResponse(answers=answers)