Spaces:
Sleeping
Sleeping
import requests | |
from bs4 import BeautifulSoup | |
import json | |
# Step 1: Fetch the authenticity_token and commitOid from the GitHub edit page | |
def fetch_authenticity_token_and_commit_oid(): | |
url = "https://github.com/omarnuwrar/facebook_users/edit/main/user.json" | |
headers = { | |
"cookie": "_octo=GH1.1.1509769180.1721774314; _device_id=0038e28d4f7d4f9baf8f76b6b9fb8980; GHCC=Required:1-Analytics:1-SocialMedia:1-Advertising:1; MicrosoftApplicationsTelemetryDeviceId=c58113b4-9acb-4ba8-b9f2-4217bdef379a; MSFPC=GUID=79b87b010d464a8783fbf43e19eccddf&HASH=79b8&LV=202408&V=4&LU=1723654762596; saved_user_sessions=155741452%3A-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; user_session=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; __Host-user_session_same_site=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; logged_in=yes; dotcom_user=omarnuwrar; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; cpu_bucket=lg; preferred_color_mode=light; tz=Africa%2FTripoli; _gh_sess=Et3VoSldT9M0OPTf1sAMnJKFUFz8Il9SUj24IRU%2Fqgm5093fAYCP53rl01WZ9gCQ3IXTbj7hJzD7v%2F5yZ2xA9frNZOh0hUIjgimhL2EMFDCm4p5ffx1nhXyxZl4RyhKn2S%2Fe4Z%2FU1zlJAink62m1J8m%2BDdJBMeGXfA02osNdhbGGvsSZr1zIDYQqpwxU02xCB4L63wjwYx4DW7sZm3g8rZ61FXTF%2BBpgIJ71n6oIjzy68WE1yXmMApZin%2Fn6N%2FCR%2FHElbo7kbeEketYuHH7eUJzsgBnSDkW3BtXeY6NID5vpcZC7LTdI3rU4cRGZw50BBOINnmp90VWnRUlA6UI9EH8YVkRWIchrmd1ZWv13LywwUdM6WUVQYgky9jObbEnA--DLzBDC0Q3Nc6ruc9--S2YvbxU4k8fZSN11zJMD%2FA%3D%3D", | |
"if-none-match": 'W/"2ff86bd1792cfee5ed79ee070b3b46de"', | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", | |
"x-github-target": "dotcom", | |
"x-react-router": "json", | |
"x-requested-with": "XMLHttpRequest", | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.text, 'html.parser') | |
script_tag = soup.find("script", {"type": "application/json", "data-target": "react-app.embeddedData"}) | |
if script_tag: | |
try: | |
json_data = json.loads(script_tag.string.strip()) | |
authenticity_token = json_data["payload"]["csrf_tokens"]["/omarnuwrar/facebook_users/tree-save/main/user.json"]["post"] | |
commit_oid = json_data["payload"]["webCommitInfo"]["commitOid"] | |
return authenticity_token, commit_oid | |
except (KeyError, json.JSONDecodeError) as e: | |
print(f"Error: Failed to extract data. Details: {str(e)}") | |
return None, None | |
else: | |
print("Error: Could not find the required <script> tag.") | |
return None, None | |
else: | |
print(f"Error: Failed to fetch the page. Status code: {response.status_code}") | |
return None, None | |
# Step 2: Send the POST request to update the user.json file | |
def update_user_json_file(authenticity_token, commit_oid, new_content): | |
url = "https://github.com/omarnuwrar/facebook_users/tree-save/main/user.json" | |
headers = { | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", | |
"x-requested-with": "XMLHttpRequest", | |
"github-verified-fetch": "true", | |
"content-type": "application/x-www-form-urlencoded", | |
"cookie": "_octo=GH1.1.1509769180.1721774314; _device_id=0038e28d4f7d4f9baf8f76b6b9fb8980; GHCC=Required:1-Analytics:1-SocialMedia:1-Advertising:1; MicrosoftApplicationsTelemetryDeviceId=c58113b4-9acb-4ba8-b9f2-4217bdef379a; MSFPC=GUID=79b87b010d464a8783fbf43e19eccddf&HASH=79b8&LV=202408&V=4&LU=1723654762596; saved_user_sessions=155741452%3A-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; user_session=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; __Host-user_session_same_site=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; logged_in=yes; dotcom_user=omarnuwrar; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; cpu_bucket=lg; preferred_color_mode=light; tz=Africa%2FTripoli; _gh_sess=iGiHV2pREeAQUlLWkb1wo811bKhhuAzlVs5AtTemyU3AiBZ7YoWmORTAktrhyJB4jDGKBILs7RalWi8VR8vZkzaUQvvIL6OPlBjjjN7Pa6SUkyG4EUrg2HIRcffC476tf0zVeiKp%2F4JYXb%2BQdxIQnLkJCcW1%2FZUzyJEdxN4TH6nxdP6WcZjlMLqON0jfBWRrZ04L7TbV10y%2BTg09oAajFnv6OHAzMvmhyysN7hd7%2FEE4OvIMbcabKeDzO%2FjIIjIvdkyyb3C4Ct0x6iHMtkaKwuTWnIeU7K9GCu8z1jg6xu79wW3QuVe7qsoHUrpeSu8%2FgfBwhhlIt05OiUO5lHOh%2FfTlJW1YNUGD1HHc2upfSn3BByveMzH%2FLWIgJxJNbxu7--Vp%2B%2BUTGEI0obQY0r--qJ%2FCfe6eR4OcpfnxHoQfDg%3D%3D", | |
} | |
payload = { | |
"message": "Update user.json", | |
"placeholder_message": "Update user.json", | |
"description": "", | |
"commit-choice": "direct", | |
"target_branch": "main", | |
"quick_pull": "", | |
"guidance_task": "", | |
"commit": commit_oid, | |
"same_repo": "1", | |
"pr": "", | |
"content_changed": "true", | |
"filename": "user.json", | |
"new_filename": "user.json", | |
"value": new_content, | |
"authenticity_token": authenticity_token, | |
} | |
response = requests.post(url, headers=headers, data=payload) | |
if response.status_code == 200: | |
return {"success": True, "message": "user.json has been updated!"} | |
else: | |
return {"success": False, "message": f"Request failed with status code {response.status_code}", "details": response.text} | |
# Function to fetch and extract the JSON data | |
def fetch_json_from_github(): | |
# URL of the GitHub page | |
url = "https://github.com/omarnuwrar/facebook_users/blob/main/user.json" | |
# Custom headers | |
headers = { | |
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", | |
"Cookie": "_octo=GH1.1.1509769180.1721774314; _device_id=0038e28d4f7d4f9baf8f76b6b9fb8980; GHCC=Required:1-Analytics:1-SocialMedia:1-Advertising:1; MicrosoftApplicationsTelemetryDeviceId=c58113b4-9acb-4ba8-b9f2-4217bdef379a; MSFPC=GUID=79b87b010d464a8783fbf43e19eccddf&HASH=79b8&LV=202408&V=4&LU=1723654762596; saved_user_sessions=155741452%3A-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; user_session=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; __Host-user_session_same_site=-zRjSJs7jZqrr1__FmMhrYJm9FbPo9S5piz0arT1xJWkOgH4; logged_in=yes; dotcom_user=omarnuwrar; color_mode=%7B%22color_mode%22%3A%22auto%22%2C%22light_theme%22%3A%7B%22name%22%3A%22light%22%2C%22color_mode%22%3A%22light%22%7D%2C%22dark_theme%22%3A%7B%22name%22%3A%22dark%22%2C%22color_mode%22%3A%22dark%22%7D%7D; cpu_bucket=lg; preferred_color_mode=light; tz=Africa%2FTripoli; _gh_sess=x%2FUolEogMJm8xgplCEKLt%2Bjr2fy%2F32MeeXdsPUHdt0Wsm9LJL%2FKoQvBK8m917eOkeQ%2FLQ3ZYhpr5GYWIZg6ppbEcN8uRr7wvLyeD0H7xsr1OBuE7PLtY37f6jHoGjAxkZeNXEZGuYhlxI%2FVujsH7VmHgEg7IMo54gAxe3LeaMg3TgOkMydkJnnvNwflqY5b%2FqCBDaDvp3OqnaORUpiwLPUx74SZmMvKiQqxP2QCTh6Jp6W6aVeow%2FJOfAHzP7ht5%2BcxpNc8Oj5dtxo%2FhvFC1OS6bk%2BZLSg46HFGPVMgVAU3g2NLGkC2shtjR9pAri%2FEj5e%2BYmkJIsH%2FwImQM%2BbERdCn2Yx1%2BN3bKiPZUZmYUypgEvy4zBu6hD8Ged36j2BHZ--wZa14jvl23NHDEOt--A%2BjPZnS%2FCvd5pmuzbEl%2B7Q%3D%3D" | |
} | |
try: | |
# Fetch the HTML content of the page | |
response = requests.get(url, headers=headers) | |
response.raise_for_status() # Raise an exception for HTTP errors | |
# Parse the HTML using BeautifulSoup | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Find the <script> tag with type="application/json" and `data-target="react-app.embeddedData"` | |
script_tag = soup.find('script', {'type': 'application/json', 'data-target': 'react-app.embeddedData'}) | |
if script_tag: | |
# Load the JSON content from the <script> tag | |
embedded_data = json.loads(script_tag.string) | |
# Navigate to the "blob" > "rawLines" key for the JSON in the file | |
raw_lines = embedded_data.get("payload", {}).get("blob", {}).get("rawLines", []) | |
if raw_lines: | |
# The JSON content is in the first element of the rawLines list | |
json_content = raw_lines[0] | |
# Parse the JSON content | |
data = json.loads(json_content) | |
# Return the extracted JSON data | |
return {"success": True, "data": data} | |
else: | |
return {"success": False, "message": "JSON data not found in the 'rawLines' key."} | |
else: | |
return {"success": False, "message": "Could not find the <script> tag with embedded JSON data."} | |
except requests.exceptions.RequestException as e: | |
return {"success": False, "message": f"Error fetching data: {e}"} | |
except json.JSONDecodeError as je: | |
return {"success": False, "message": f"Error parsing JSON: {je}"} | |