File size: 6,800 Bytes
79859e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 |
from __future__ import annotations
import asyncio
import time
import json
from aiohttp import ClientSession, BaseConnector
from urllib.parse import quote
from typing import List, Dict
try:
from bs4 import BeautifulSoup
has_requirements = True
except ImportError:
has_requirements = False
from ..helper import get_connector
from ...errors import MissingRequirementsError, RateLimitError
from ...webdriver import WebDriver, get_driver_cookies, get_browser
BING_URL = "https://www.bing.com"
TIMEOUT_LOGIN = 1200
TIMEOUT_IMAGE_CREATION = 300
ERRORS = [
"this prompt is being reviewed",
"this prompt has been blocked",
"we're working hard to offer image creator in more languages",
"we can't create your images right now"
]
BAD_IMAGES = [
"https://r.bing.com/rp/in-2zU3AJUdkgFe7ZKv19yPBHVs.png",
"https://r.bing.com/rp/TX9QuO3WzcCJz1uaaSwQAz39Kb0.jpg",
]
def wait_for_login(driver: WebDriver, timeout: int = TIMEOUT_LOGIN) -> None:
"""
Waits for the user to log in within a given timeout period.
Args:
driver (WebDriver): Webdriver for browser automation.
timeout (int): Maximum waiting time in seconds.
Raises:
RuntimeError: If the login process exceeds the timeout.
"""
driver.get(f"{BING_URL}/")
start_time = time.time()
while not driver.get_cookie("_U"):
if time.time() - start_time > timeout:
raise RuntimeError("Timeout error")
time.sleep(0.5)
def get_cookies_from_browser(proxy: str = None) -> dict[str, str]:
"""
Retrieves cookies from the browser using webdriver.
Args:
proxy (str, optional): Proxy configuration.
Returns:
dict[str, str]: Retrieved cookies.
"""
with get_browser(proxy=proxy) as driver:
wait_for_login(driver)
time.sleep(1)
return get_driver_cookies(driver)
def create_session(cookies: Dict[str, str], proxy: str = None, connector: BaseConnector = None) -> ClientSession:
"""
Creates a new client session with specified cookies and headers.
Args:
cookies (Dict[str, str]): Cookies to be used for the session.
Returns:
ClientSession: The created client session.
"""
headers = {
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
"accept-encoding": "gzip, deflate, br",
"accept-language": "en-US,en;q=0.9,zh-CN;q=0.8,zh-TW;q=0.7,zh;q=0.6",
"content-type": "application/x-www-form-urlencoded",
"referrer-policy": "origin-when-cross-origin",
"referrer": "https://www.bing.com/images/create/",
"origin": "https://www.bing.com",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.54",
"sec-ch-ua": "\"Microsoft Edge\";v=\"111\", \"Not(A:Brand\";v=\"8\", \"Chromium\";v=\"111\"",
"sec-ch-ua-mobile": "?0",
"sec-fetch-dest": "document",
"sec-fetch-mode": "navigate",
"sec-fetch-site": "same-origin",
"sec-fetch-user": "?1",
"upgrade-insecure-requests": "1",
}
if cookies:
headers["Cookie"] = "; ".join(f"{k}={v}" for k, v in cookies.items())
return ClientSession(headers=headers, connector=get_connector(connector, proxy))
async def create_images(session: ClientSession, prompt: str, timeout: int = TIMEOUT_IMAGE_CREATION) -> List[str]:
"""
Creates images based on a given prompt using Bing's service.
Args:
session (ClientSession): Active client session.
prompt (str): Prompt to generate images.
proxy (str, optional): Proxy configuration.
timeout (int): Timeout for the request.
Returns:
List[str]: A list of URLs to the created images.
Raises:
RuntimeError: If image creation fails or times out.
"""
if not has_requirements:
raise MissingRequirementsError('Install "beautifulsoup4" package')
url_encoded_prompt = quote(prompt)
payload = f"q={url_encoded_prompt}&rt=4&FORM=GENCRE"
url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=4&FORM=GENCRE"
async with session.post(url, allow_redirects=False, data=payload, timeout=timeout) as response:
response.raise_for_status()
text = (await response.text()).lower()
if "0 coins available" in text:
raise RateLimitError("No coins left. Log in with a different account or wait a while")
for error in ERRORS:
if error in text:
raise RuntimeError(f"Create images failed: {error}")
if response.status != 302:
url = f"{BING_URL}/images/create?q={url_encoded_prompt}&rt=3&FORM=GENCRE"
async with session.post(url, allow_redirects=False, timeout=timeout) as response:
if response.status != 302:
raise RuntimeError(f"Create images failed. Code: {response.status}")
redirect_url = response.headers["Location"].replace("&nfy=1", "")
redirect_url = f"{BING_URL}{redirect_url}"
request_id = redirect_url.split("id=")[-1]
async with session.get(redirect_url) as response:
response.raise_for_status()
polling_url = f"{BING_URL}/images/create/async/results/{request_id}?q={url_encoded_prompt}"
start_time = time.time()
while True:
if time.time() - start_time > timeout:
raise RuntimeError(f"Timeout error after {timeout} sec")
async with session.get(polling_url) as response:
if response.status != 200:
raise RuntimeError(f"Polling images faild. Code: {response.status}")
text = await response.text()
if not text or "GenerativeImagesStatusPage" in text:
await asyncio.sleep(1)
else:
break
error = None
try:
error = json.loads(text).get("errorMessage")
except:
pass
if error == "Pending":
raise RuntimeError("Prompt is been blocked")
elif error:
raise RuntimeError(error)
return read_images(text)
def read_images(html_content: str) -> List[str]:
"""
Extracts image URLs from the HTML content.
Args:
html_content (str): HTML content containing image URLs.
Returns:
List[str]: A list of image URLs.
"""
soup = BeautifulSoup(html_content, "html.parser")
tags = soup.find_all("img", class_="mimg")
if not tags:
tags = soup.find_all("img", class_="gir_mmimg")
images = [img["src"].split("?w=")[0] for img in tags]
if any(im in BAD_IMAGES for im in images):
raise RuntimeError("Bad images found")
if not images:
raise RuntimeError("No images found")
return images |