sylvain471 commited on
Commit
94a6b38
·
verified ·
1 Parent(s): c08f32e

Upload 3 files

Browse files
Files changed (3) hide show
  1. Dockerfile +4 -2
  2. app.py +16 -7
  3. scrape_fake_app.py +50 -45
Dockerfile CHANGED
@@ -21,8 +21,10 @@ WORKDIR /code
21
 
22
  COPY ./requirements.txt /code/requirements.txt
23
 
24
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
25
-
 
 
26
  RUN pip install playwright
27
  RUN playwright install --with-deps
28
 
 
21
 
22
  COPY ./requirements.txt /code/requirements.txt
23
 
24
+ # RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
25
+ RUN pip install https://gradio-pypi-previews.s3.amazonaws.com/b176db9d6e1bffd36f5b2ecd54b425e4cec3dab8/gradio-5.33.0-py3-none-any.whl
26
+ RUN pip install gradio[mcp]
27
+ RUN pip install python-dotenv
28
  RUN pip install playwright
29
  RUN playwright install --with-deps
30
 
app.py CHANGED
@@ -8,28 +8,35 @@ if os.getenv('ENVIRONMENT')=="PROD":
8
  print("installing playwright firefox")
9
  subprocess.run(["playwright","install","firefox"])
10
 
11
- def fetch_homework(date:str='today') -> str:
 
12
  """
13
  description:
14
  fetch the homeworks.
15
  Args:
16
- date: any string, default "today"
17
  Returns:
18
  The string describing the homeworks
19
  """
20
- return get_homework()
 
 
 
21
 
22
 
23
- def fetch_timetable(date:str='today') -> str:
24
  """
25
  description:
26
  fetch the timetable
27
  Args:
28
- date: any string, default "today"
29
  Returns:
30
  The string describing the timetable
31
  """
32
- return get_timetable()
 
 
 
33
 
34
 
35
  title="<h2>Gradio MCP Hackathon: fake-app-scraper</h2>"
@@ -58,7 +65,6 @@ images="""<img src="gradio_api/file=login.png" alt="login" style="max-width: 35%
58
 
59
  with gr.Blocks() as demo:
60
 
61
- # Add title and markdown
62
  with gr.Row():
63
  gr.HTML(title)
64
 
@@ -78,6 +84,8 @@ with gr.Blocks() as demo:
78
  gr.HTML(description)
79
  gr.HTML(images)
80
 
 
 
81
  homeworks_btn.click(fn=fetch_homework,
82
  inputs=[date],
83
  outputs=homeworks_output)
@@ -87,4 +95,5 @@ with gr.Blocks() as demo:
87
  inputs=[date],
88
  outputs=timetable_output)
89
 
 
90
  demo.launch(mcp_server=True,allowed_paths=["/"])
 
8
  print("installing playwright firefox")
9
  subprocess.run(["playwright","install","firefox"])
10
 
11
+
12
+ def fetch_homework(date:str,request: gr.Request) -> str:
13
  """
14
  description:
15
  fetch the homeworks.
16
  Args:
17
+ date: (str) set it to "today" unless explicitly specified otherwise
18
  Returns:
19
  The string describing the homeworks
20
  """
21
+ # print(str(dict(request.headers)["token"]))
22
+ token = str(dict(request.headers)["token"])
23
+
24
+ return get_homework(token)
25
 
26
 
27
+ def fetch_timetable(date:str,request: gr.Request) -> str:
28
  """
29
  description:
30
  fetch the timetable
31
  Args:
32
+ date: (str) set it to "today" unless explicitly specified otherwise
33
  Returns:
34
  The string describing the timetable
35
  """
36
+ # print(str(dict(request.headers)["token"]))
37
+ token = str(dict(request.headers)["token"])
38
+ return get_timetable(token)
39
+
40
 
41
 
42
  title="<h2>Gradio MCP Hackathon: fake-app-scraper</h2>"
 
65
 
66
  with gr.Blocks() as demo:
67
 
 
68
  with gr.Row():
69
  gr.HTML(title)
70
 
 
84
  gr.HTML(description)
85
  gr.HTML(images)
86
 
87
+
88
+
89
  homeworks_btn.click(fn=fetch_homework,
90
  inputs=[date],
91
  outputs=homeworks_output)
 
95
  inputs=[date],
96
  outputs=timetable_output)
97
 
98
+
99
  demo.launch(mcp_server=True,allowed_paths=["/"])
scrape_fake_app.py CHANGED
@@ -1,14 +1,22 @@
1
  from playwright.sync_api import sync_playwright
2
  import os
3
  from dotenv import load_dotenv
4
- import json
 
5
 
6
- def load_credentials()-> dict:
7
- load_dotenv()
8
- URL = os.getenv('FAKE_APP_URL')
9
- USERNAME = os.getenv('FAKE_APP_USERNAME')
10
- PASSWORD = os.getenv('FAKE_APP_PASSWORD')
11
- return (URL,USERNAME,PASSWORD)
 
 
 
 
 
 
 
12
 
13
  def extract_homework_text(page) -> str:
14
  card = page.get_by_title("homework")
@@ -45,47 +53,44 @@ def extract_timetable_text(page):
45
 
46
 
47
  # print(URL,USERNAME,PASSWORD)
48
- def get_homework() -> str:
49
- try:
50
- URL,USERNAME,PASSWORD=load_credentials()
51
-
52
- with sync_playwright() as playwright:
53
- browser = playwright.firefox.launch(headless=True)
54
- page = browser.new_page()
55
- page.goto(URL,wait_until="domcontentloaded")
56
-
57
- page.get_by_role('textbox',name='username').fill(USERNAME)
58
- page.get_by_role('textbox',name='password').fill(PASSWORD)
59
- page.get_by_role('button',name='login').click()
60
- page.wait_for_url("**/dashboard")
61
- # page.wait_for_timeout(1000)
62
- homework = extract_homework_text(page)
63
- browser.close()
64
- return homework
65
 
66
- except Exception as e:
67
- return json.dumps(e)
68
-
69
- def get_timetable() -> str:
70
- try:
71
- URL,USERNAME,PASSWORD=load_credentials()
72
 
73
- with sync_playwright() as playwright:
74
- browser = playwright.firefox.launch(headless=True)
75
- page = browser.new_page()
76
- page.goto(URL,wait_until="domcontentloaded")
77
-
78
- page.get_by_role('textbox',name='username').fill(USERNAME)
79
- page.get_by_role('textbox',name='password').fill(PASSWORD)
80
- page.get_by_role('button',name='login').click()
81
- page.wait_for_url("**/dashboard")
82
- # page.wait_for_timeout(1000)
83
- timetable = extract_timetable_text(page)
84
- browser.close()
85
- return timetable
86
 
87
- except Exception as e:
88
- return json.dumps(e)
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if __name__=="__main__":
91
  print(get_homework())
 
1
  from playwright.sync_api import sync_playwright
2
  import os
3
  from dotenv import load_dotenv
4
+ import time
5
+ import base64
6
 
7
+ # def load_credentials()-> dict:
8
+ # load_dotenv()
9
+ # URL = os.getenv('FAKE_APP_URL')
10
+ # USERNAME = os.getenv('FAKE_APP_USERNAME')
11
+ # PASSWORD = os.getenv('FAKE_APP_PASSWORD')
12
+ # return (URL,USERNAME,PASSWORD)
13
+
14
+ def decode_token(base64_token) -> str:
15
+ base64_bytes = base64_token.encode("ascii")
16
+ token_bytes = base64.b64decode(base64_bytes)
17
+ token = token_bytes.decode("ascii")
18
+ # print(token)
19
+ return token
20
 
21
  def extract_homework_text(page) -> str:
22
  card = page.get_by_title("homework")
 
53
 
54
 
55
  # print(URL,USERNAME,PASSWORD)
56
+ def get_homework(base64_token) -> str:
57
+ token=decode_token(base64_token)
58
+ # print(token)
59
+ URL,USERNAME,PASSWORD=token.split("<sep>")
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
+ with sync_playwright() as playwright:
62
+ browser = playwright.firefox.launch(headless=True)
63
+ page = browser.new_page()
64
+ page.goto(URL,wait_until="domcontentloaded")
 
 
65
 
66
+ page.get_by_role('textbox',name='username').fill(USERNAME)
67
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
68
+ page.get_by_role('button',name='login').click()
69
+ page.wait_for_url("**/dashboard")
70
+ # page.wait_for_timeout(1000)
71
+ homework = extract_homework_text(page)
72
+ browser.close()
73
+ return homework
74
+
75
+ def get_timetable(base64_token) -> str:
76
+ token=decode_token(base64_token)
77
+ URL,USERNAME,PASSWORD=token.split("<sep>")
78
+ # URL,USERNAME,PASSWORD=load_credentials()
79
 
80
+ with sync_playwright() as playwright:
81
+ browser = playwright.firefox.launch(headless=True)
82
+ page = browser.new_page()
83
+ page.goto(URL,wait_until="domcontentloaded")
84
+
85
+ page.get_by_role('textbox',name='username').fill(USERNAME)
86
+ page.get_by_role('textbox',name='password').fill(PASSWORD)
87
+ page.get_by_role('button',name='login').click()
88
+ page.wait_for_url("**/dashboard")
89
+ # page.wait_for_timeout(1000)
90
+ timetable = extract_timetable_text(page)
91
+ browser.close()
92
+
93
+ return timetable
94
 
95
  if __name__=="__main__":
96
  print(get_homework())