euler314 commited on
Commit
6f39f32
·
verified ·
1 Parent(s): 5641dea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -8
app.py CHANGED
@@ -25,6 +25,10 @@ import shutil
25
  from PIL import Image
26
  from reportlab.lib.pagesizes import letter
27
  from reportlab.pdfgen import canvas
 
 
 
 
28
 
29
  # -------------------- Logging Setup --------------------
30
  logging.basicConfig(
@@ -33,6 +37,19 @@ logging.basicConfig(
33
  )
34
  logger = logging.getLogger(__name__)
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  # -------------------- Utility Functions --------------------
37
  def get_random_user_agent():
38
  USER_AGENTS = [
@@ -58,6 +75,56 @@ def create_zip_file(file_paths, output_dir):
58
  zipf.write(file_path, os.path.basename(file_path))
59
  return zip_path
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  # -------------------- Playwright Setup --------------------
62
  def install_playwright_dependencies():
63
  try:
@@ -65,9 +132,17 @@ def install_playwright_dependencies():
65
  os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
66
 
67
  # Install system dependencies
 
 
 
 
 
 
 
 
 
68
  subprocess.run(['pip', 'install', 'playwright'], check=True)
69
- subprocess.run(['playwright', 'install', 'chromium'], check=True)
70
- subprocess.run(['playwright', 'install-deps', 'chromium'], check=True)
71
 
72
  st.success("Playwright dependencies installed successfully!")
73
  except Exception as e:
@@ -1664,6 +1739,7 @@ def main():
1664
  st.session_state.initialized = True
1665
  st.session_state.discovered_files = []
1666
  st.session_state.current_url = None
 
1667
  st.session_state.selected_files = []
1668
  st.session_state.do_deep_search = False
1669
  st.session_state.deep_search_url = None
@@ -1678,6 +1754,16 @@ def main():
1678
  use_proxy = st.checkbox("Use Proxy", key="proxy_checkbox")
1679
  proxy = st.text_input("Proxy URL", placeholder="http://proxy:port", key="proxy_input")
1680
 
 
 
 
 
 
 
 
 
 
 
1681
  if mode == "Manual URL":
1682
  st.header("Manual URL Mode")
1683
  url = st.text_input("Enter URL", placeholder="https://example.com", key="url_input")
@@ -1744,8 +1830,15 @@ def main():
1744
  st.session_state.selected_files = selected_indices
1745
 
1746
  if selected_indices:
1747
- download_dir = st.text_input("Download Directory", value="./downloads", key="download_dir_input")
1748
- create_zip = st.checkbox("Create ZIP file", value=True, key="create_zip_checkbox")
 
 
 
 
 
 
 
1749
 
1750
  if st.button("Download Selected", key="download_btn"):
1751
  if not os.path.exists(download_dir):
@@ -1777,8 +1870,7 @@ def main():
1777
  if downloaded:
1778
  st.success(f"Successfully downloaded {len(downloaded)} files")
1779
 
1780
- # Create file downloads
1781
- if create_zip and len(downloaded) > 1:
1782
  zip_path = create_zip_file(downloaded, download_dir)
1783
  st.success(f"Created ZIP file: {zip_path}")
1784
 
@@ -1786,14 +1878,32 @@ def main():
1786
  with open(zip_path, "rb") as f:
1787
  zip_data = f.read()
1788
 
1789
- zip_filename = os.path.basename(zip_path)
1790
  st.download_button(
1791
  label="Download ZIP",
1792
  data=zip_data,
1793
- file_name=zip_filename,
1794
  mime="application/zip",
1795
  key="download_zip_btn"
1796
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1797
  else:
1798
  # Provide individual file downloads
1799
  st.write("Download files individually:")
 
25
  from PIL import Image
26
  from reportlab.lib.pagesizes import letter
27
  from reportlab.pdfgen import canvas
28
+ import google_auth_oauthlib.flow
29
+ import googleapiclient.discovery
30
+ import google.auth.transport.requests
31
+ import googleapiclient.http
32
 
33
  # -------------------- Logging Setup --------------------
34
  logging.basicConfig(
 
37
  )
38
  logger = logging.getLogger(__name__)
39
 
40
+ # -------------------- Google OAuth Config --------------------
41
+ GOOGLE_OAUTH_CONFIG = {
42
+ "web": {
43
+ "client_id": "90798824947-u25obg1q844qeikjoh4jdmi579kn9p1c.apps.googleusercontent.com",
44
+ "project_id": "huggingface-449214",
45
+ "auth_uri": "https://accounts.google.com/o/oauth2/auth",
46
+ "token_uri": "https://oauth2.googleapis.com/token",
47
+ "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
48
+ "client_secret": "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",
49
+ "redirect_uris": ["https://euler314-craw-web.hf.space/"]
50
+ }
51
+ }
52
+
53
  # -------------------- Utility Functions --------------------
54
  def get_random_user_agent():
55
  USER_AGENTS = [
 
75
  zipf.write(file_path, os.path.basename(file_path))
76
  return zip_path
77
 
78
+ # -------------------- Google Drive Functions --------------------
79
+ def get_google_auth_url():
80
+ client_config = GOOGLE_OAUTH_CONFIG["web"]
81
+ flow = google_auth_oauthlib.flow.Flow.from_client_config(
82
+ {"web": client_config},
83
+ scopes=["https://www.googleapis.com/auth/drive.file"]
84
+ )
85
+ flow.redirect_uri = client_config["redirect_uris"][0]
86
+ authorization_url, _ = flow.authorization_url(
87
+ access_type="offline",
88
+ include_granted_scopes="true",
89
+ prompt="consent"
90
+ )
91
+ return authorization_url
92
+
93
+ def exchange_code_for_credentials(auth_code):
94
+ if not auth_code.strip():
95
+ return None, "No code provided."
96
+ try:
97
+ client_config = GOOGLE_OAUTH_CONFIG["web"]
98
+ flow = google_auth_oauthlib.flow.Flow.from_client_config(
99
+ {"web": client_config},
100
+ scopes=["https://www.googleapis.com/auth/drive.file"]
101
+ )
102
+ flow.redirect_uri = client_config["redirect_uris"][0]
103
+ flow.fetch_token(code=auth_code.strip())
104
+ creds = flow.credentials
105
+ if not creds or not creds.valid:
106
+ return None, "Could not validate credentials. Check code and try again."
107
+ return creds, "Google Sign-In successful!"
108
+ except Exception as e:
109
+ return None, f"Error during token exchange: {e}"
110
+
111
+ def google_drive_upload(file_path, credentials, folder_id=None):
112
+ try:
113
+ drive_service = googleapiclient.discovery.build("drive", "v3", credentials=credentials)
114
+ file_metadata = {'name': os.path.basename(file_path)}
115
+ if folder_id:
116
+ file_metadata['parents'] = [folder_id]
117
+ media = googleapiclient.http.MediaFileUpload(file_path, resumable=True)
118
+ created = drive_service.files().create(body=file_metadata, media_body=media, fields='id').execute()
119
+ return created.get("id", "")
120
+ except Exception as e:
121
+ return f"Error uploading to Drive: {str(e)}"
122
+
123
+ def create_drive_folder(drive_service, name):
124
+ folder_metadata = {'name': name, 'mimeType': 'application/vnd.google-apps.folder'}
125
+ folder = drive_service.files().create(body=folder_metadata, fields='id').execute()
126
+ return folder.get('id')
127
+
128
  # -------------------- Playwright Setup --------------------
129
  def install_playwright_dependencies():
130
  try:
 
132
  os.environ['PLAYWRIGHT_BROWSERS_PATH'] = os.path.expanduser("~/.cache/ms-playwright")
133
 
134
  # Install system dependencies
135
+ subprocess.run(['apt-get', 'update', '-y'], check=True)
136
+ packages = [
137
+ 'libnss3', 'libnss3-tools', 'libnspr4', 'libatk1.0-0',
138
+ 'libatk-bridge2.0-0', 'libatspi2.0-0', 'libcups2', 'libxcomposite1',
139
+ 'libxdamage1', 'libdrm2', 'libgbm1', 'libpango-1.0-0'
140
+ ]
141
+ subprocess.run(['apt-get', 'install', '-y', '--no-install-recommends'] + packages, check=True)
142
+
143
+ # Install Playwright and dependencies
144
  subprocess.run(['pip', 'install', 'playwright'], check=True)
145
+ subprocess.run(['python3', '-m', 'playwright', 'install', 'chromium'], check=True)
 
146
 
147
  st.success("Playwright dependencies installed successfully!")
148
  except Exception as e:
 
1739
  st.session_state.initialized = True
1740
  st.session_state.discovered_files = []
1741
  st.session_state.current_url = None
1742
+ st.session_state.google_creds = None
1743
  st.session_state.selected_files = []
1744
  st.session_state.do_deep_search = False
1745
  st.session_state.deep_search_url = None
 
1754
  use_proxy = st.checkbox("Use Proxy", key="proxy_checkbox")
1755
  proxy = st.text_input("Proxy URL", placeholder="http://proxy:port", key="proxy_input")
1756
 
1757
+ with st.expander("Google Drive Integration", expanded=False):
1758
+ if st.button("Start Google Sign-In", key="google_signin_btn"):
1759
+ auth_url = get_google_auth_url()
1760
+ st.markdown(f"[Click here to authorize]({auth_url})")
1761
+ auth_code = st.text_input("Enter authorization code", key="auth_code_input")
1762
+ if st.button("Complete Sign-In", key="complete_signin_btn") and auth_code:
1763
+ creds, msg = exchange_code_for_credentials(auth_code)
1764
+ st.session_state.google_creds = creds
1765
+ st.write(msg)
1766
+
1767
  if mode == "Manual URL":
1768
  st.header("Manual URL Mode")
1769
  url = st.text_input("Enter URL", placeholder="https://example.com", key="url_input")
 
1830
  st.session_state.selected_files = selected_indices
1831
 
1832
  if selected_indices:
1833
+ col1, col2, col3, col4 = st.columns(4)
1834
+ with col1:
1835
+ download_dir = st.text_input("Download Directory", value="./downloads", key="download_dir_input")
1836
+ with col2:
1837
+ create_zip = st.checkbox("Create ZIP file", value=True, key="create_zip_checkbox")
1838
+ with col3:
1839
+ delete_after = st.checkbox("Delete after creating ZIP", key="delete_after_checkbox")
1840
+ with col4:
1841
+ upload_to_drive = st.checkbox("Upload to Google Drive", key="upload_drive_checkbox")
1842
 
1843
  if st.button("Download Selected", key="download_btn"):
1844
  if not os.path.exists(download_dir):
 
1870
  if downloaded:
1871
  st.success(f"Successfully downloaded {len(downloaded)} files")
1872
 
1873
+ if create_zip:
 
1874
  zip_path = create_zip_file(downloaded, download_dir)
1875
  st.success(f"Created ZIP file: {zip_path}")
1876
 
 
1878
  with open(zip_path, "rb") as f:
1879
  zip_data = f.read()
1880
 
 
1881
  st.download_button(
1882
  label="Download ZIP",
1883
  data=zip_data,
1884
+ file_name=os.path.basename(zip_path),
1885
  mime="application/zip",
1886
  key="download_zip_btn"
1887
  )
1888
+
1889
+ # Upload to Google Drive if requested
1890
+ if upload_to_drive and st.session_state.google_creds:
1891
+ drive_service = googleapiclient.discovery.build("drive", "v3", credentials=st.session_state.google_creds)
1892
+ folder_id = create_drive_folder(drive_service, f"Downloads_{urlparse(url).netloc}")
1893
+ drive_id = google_drive_upload(zip_path, st.session_state.google_creds, folder_id)
1894
+ if not isinstance(drive_id, str) or not drive_id.startswith("Error"):
1895
+ st.success(f"Uploaded to Google Drive. File ID: {drive_id}")
1896
+ else:
1897
+ st.error(drive_id)
1898
+
1899
+ # Delete original files if requested
1900
+ if delete_after:
1901
+ for path in downloaded:
1902
+ try:
1903
+ os.remove(path)
1904
+ except Exception as e:
1905
+ st.warning(f"Could not delete {path}: {e}")
1906
+ st.info("Deleted original files after ZIP creation")
1907
  else:
1908
  # Provide individual file downloads
1909
  st.write("Download files individually:")