Spaces:
Running
Running
Upload 3 files
Browse files- driver.py +242 -0
- instagram.py +78 -0
- main.py +2 -0
driver.py
CHANGED
@@ -17,6 +17,11 @@ from selenium.webdriver.common.by import By
|
|
17 |
from selenium.webdriver.support.expected_conditions import presence_of_element_located
|
18 |
from selenium.webdriver.support.wait import WebDriverWait
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
class YoutubeDriver:
|
21 |
def __init__(self, search_terms: str, max_results: int = 5):
|
22 |
self.base_url = "https://youtube.com/results?search_query={0}"
|
@@ -149,3 +154,240 @@ class YoutubeDriver:
|
|
149 |
"quiet": True,
|
150 |
"logtostderr": False,
|
151 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
from selenium.webdriver.support.expected_conditions import presence_of_element_located
|
18 |
from selenium.webdriver.support.wait import WebDriverWait
|
19 |
|
20 |
+
CHROME_BIN = "/app/.chrome-for-testing/chrome-linux64/chrome"
|
21 |
+
CHROME_DRIVER = "/app/.chrome-for-testing/chromedriver-linux64/chromedriver"
|
22 |
+
DWL_DIR = "./downloads/"
|
23 |
+
TEMP_DIR = "./temp/"
|
24 |
+
|
25 |
class YoutubeDriver:
|
26 |
def __init__(self, search_terms: str, max_results: int = 5):
|
27 |
self.base_url = "https://youtube.com/results?search_query={0}"
|
|
|
154 |
"quiet": True,
|
155 |
"logtostderr": False,
|
156 |
}
|
157 |
+
|
158 |
+
class ChromeDriver:
|
159 |
+
def __init__(self) -> None:
|
160 |
+
self.carbon_theme = [
|
161 |
+
"3024-night",
|
162 |
+
"a11y-dark",
|
163 |
+
"blackboard",
|
164 |
+
"base16-dark",
|
165 |
+
"base16-light",
|
166 |
+
"cobalt",
|
167 |
+
"duotone-dark",
|
168 |
+
"hopscotch",
|
169 |
+
"lucario",
|
170 |
+
"material",
|
171 |
+
"monokai",
|
172 |
+
"night-owl",
|
173 |
+
"nord",
|
174 |
+
"oceanic-next",
|
175 |
+
"one-light",
|
176 |
+
"one-dark",
|
177 |
+
"panda-syntax",
|
178 |
+
"paraiso-dark",
|
179 |
+
"seti",
|
180 |
+
"shades-of-purple",
|
181 |
+
"solarized+dark",
|
182 |
+
"solarized+light",
|
183 |
+
"synthwave-84",
|
184 |
+
"twilight",
|
185 |
+
"verminal",
|
186 |
+
"vscode",
|
187 |
+
"yeti",
|
188 |
+
"zenburn",
|
189 |
+
]
|
190 |
+
|
191 |
+
def get(self):
|
192 |
+
if not CHROME_BIN:
|
193 |
+
return (
|
194 |
+
None,
|
195 |
+
"ChromeBinaryErr: No binary path found! Install Chromium or Google Chrome.",
|
196 |
+
)
|
197 |
+
|
198 |
+
try:
|
199 |
+
options = Options()
|
200 |
+
options.binary_location = CHROME_BIN
|
201 |
+
options.add_argument("--disable-dev-shm-usage")
|
202 |
+
options.add_argument("--ignore-certificate-errors")
|
203 |
+
options.add_argument("--disable-gpu")
|
204 |
+
options.add_argument("--headless=new")
|
205 |
+
options.add_argument("--test-type")
|
206 |
+
options.add_argument("--no-sandbox")
|
207 |
+
options.add_argument("--window-size=1920x1080")
|
208 |
+
options.add_experimental_option(
|
209 |
+
"prefs", {"download.default_directory": "./"}
|
210 |
+
)
|
211 |
+
service = Service(CHROME_DRIVER)
|
212 |
+
driver = webdriver.Chrome(options, service)
|
213 |
+
return driver, None
|
214 |
+
except Exception as e:
|
215 |
+
return None, f"ChromeDriverErr: {e}"
|
216 |
+
|
217 |
+
def close(self, driver: webdriver.Chrome):
|
218 |
+
driver.close()
|
219 |
+
driver.quit()
|
220 |
+
|
221 |
+
@property
|
222 |
+
def get_random_carbon(self) -> str:
|
223 |
+
url = "https://carbon.now.sh/?l=auto"
|
224 |
+
url += f"&t={random.choice(self.carbon_theme)}"
|
225 |
+
url += f"&bg=rgba%28{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C1%29"
|
226 |
+
url += "&code="
|
227 |
+
return url
|
228 |
+
|
229 |
+
async def generate_carbon(
|
230 |
+
self, driver: webdriver.Chrome, code: str, is_random: bool = False
|
231 |
+
) -> str:
|
232 |
+
filename = f"{round(time.time())}"
|
233 |
+
BASE_URL = (
|
234 |
+
self.get_random_carbon
|
235 |
+
if is_random
|
236 |
+
else "https://carbon.now.sh/?l=auto&code="
|
237 |
+
)
|
238 |
+
|
239 |
+
driver.get(BASE_URL + format_text(quote_plus(code)))
|
240 |
+
driver.command_executor._commands["send_command"] = (
|
241 |
+
"POST",
|
242 |
+
"/session/$sessionId/chromium/send_command",
|
243 |
+
)
|
244 |
+
params = {
|
245 |
+
"cmd": "Page.setDownloadBehavior",
|
246 |
+
"params": {"behavior": "allow", "downloadPath": DWL_DIR},
|
247 |
+
}
|
248 |
+
driver.execute("send_command", params)
|
249 |
+
|
250 |
+
driver.find_element(By.XPATH, "//button[@id='export-menu']").click()
|
251 |
+
driver.find_element(By.XPATH, "//input[@title='filename']").send_keys(filename)
|
252 |
+
driver.find_element(By.XPATH, "//button[@id='export-png']").click()
|
253 |
+
|
254 |
+
return f"{DWL_DIR}/{filename}.png"
|
255 |
+
|
256 |
+
class SCRAP_DATA:
|
257 |
+
"""Class to get and handel scrapped data"""
|
258 |
+
|
259 |
+
def __init__(self, urls: list[str] | str) -> None:
|
260 |
+
self.urls = urls
|
261 |
+
self.path = "./scrapped/"
|
262 |
+
if not os.path.isdir(self.path):
|
263 |
+
os.makedirs("./scrapped/")
|
264 |
+
|
265 |
+
def get_images(self) -> list:
|
266 |
+
images = []
|
267 |
+
if isinstance(self.urls, str):
|
268 |
+
requested = requests.get(self.urls)
|
269 |
+
try:
|
270 |
+
name = self.path + f"img_{time.time()}.jpg"
|
271 |
+
with open(name, "wb") as f:
|
272 |
+
f.write(requested.content)
|
273 |
+
images.append(name)
|
274 |
+
except Exception as e:
|
275 |
+
requested.close()
|
276 |
+
else:
|
277 |
+
for i in self.urls:
|
278 |
+
if i:
|
279 |
+
requested = requests.get(i)
|
280 |
+
else:
|
281 |
+
continue
|
282 |
+
try:
|
283 |
+
name = self.path + f"img_{time.time()}.jpg"
|
284 |
+
with open(name, "wb") as f:
|
285 |
+
f.write(requested.content)
|
286 |
+
images.append(name)
|
287 |
+
except Exception as e:
|
288 |
+
|
289 |
+
requested.close()
|
290 |
+
continue
|
291 |
+
return images
|
292 |
+
|
293 |
+
def get_videos(self) -> list:
|
294 |
+
videos = []
|
295 |
+
if isinstance(self.urls, str):
|
296 |
+
if i:
|
297 |
+
requested = requests.get(i)
|
298 |
+
else:
|
299 |
+
return []
|
300 |
+
try:
|
301 |
+
name = self.path + f"vid_{time.time()}.mp4"
|
302 |
+
with open(name, "wb") as f:
|
303 |
+
f.write(requested.content)
|
304 |
+
videos.append(name)
|
305 |
+
except Exception as e:
|
306 |
+
requested.close()
|
307 |
+
else:
|
308 |
+
for i in self.urls:
|
309 |
+
if i:
|
310 |
+
requested = requests.get(i)
|
311 |
+
else:
|
312 |
+
continue
|
313 |
+
try:
|
314 |
+
name = self.path + f"vid_{time.time()}.mp4"
|
315 |
+
with open(name, "wb") as f:
|
316 |
+
f.write(requested.content)
|
317 |
+
videos.append(name)
|
318 |
+
except Exception as e:
|
319 |
+
|
320 |
+
requested.close()
|
321 |
+
continue
|
322 |
+
return videos
|
323 |
+
|
324 |
+
|
325 |
+
class INSTAGRAM(ChromeDriver):
|
326 |
+
"""Class to scrap data from instagram"""
|
327 |
+
|
328 |
+
def __init__(self, url: str) -> None:
|
329 |
+
self.url = url
|
330 |
+
self.article = "article._aa6a"
|
331 |
+
self.ul_class = "_acay"
|
332 |
+
self.image_class = "x5yr21d"
|
333 |
+
self.video_class = "x1lliihq"
|
334 |
+
self.next_button = "button._afxw"
|
335 |
+
self.return_dict = {"image": [], "video": []}
|
336 |
+
super().__init__()
|
337 |
+
|
338 |
+
def get_all(self):
|
339 |
+
driver, error = self.get()
|
340 |
+
if not driver:
|
341 |
+
return error
|
342 |
+
|
343 |
+
driver.get(self.url)
|
344 |
+
wait = WebDriverWait(driver, 30)
|
345 |
+
image_links = []
|
346 |
+
video_links = []
|
347 |
+
try:
|
348 |
+
element = wait.until(presence_of_element_located(
|
349 |
+
(By.CLASS_NAME, self.ul_class)))
|
350 |
+
|
351 |
+
while True:
|
352 |
+
sub_element = element.find_elements(
|
353 |
+
By.CLASS_NAME, self.image_class)
|
354 |
+
for i in sub_element:
|
355 |
+
url = i.get_attribute("src")
|
356 |
+
image_links.append(url)
|
357 |
+
|
358 |
+
sub_element = element.find_elements(
|
359 |
+
By.CLASS_NAME, self.video_class)
|
360 |
+
for i in sub_element:
|
361 |
+
url = i.get_attribute("src")
|
362 |
+
video_links.append(url)
|
363 |
+
|
364 |
+
try:
|
365 |
+
driver.find_element(
|
366 |
+
By.CSS_SELECTOR, self.next_button).click()
|
367 |
+
except:
|
368 |
+
break
|
369 |
+
except:
|
370 |
+
element = wait.until(presence_of_element_located((By.CSS_SELECTOR, self.article)))
|
371 |
+
try:
|
372 |
+
sub_element = element.find_element(By.TAG_NAME, "img")
|
373 |
+
url = sub_element.get_attribute("src")
|
374 |
+
image_links.append(url)
|
375 |
+
except:
|
376 |
+
sub_element = element.find_element(By.TAG_NAME, "video")
|
377 |
+
url = sub_element.get_attribute("src")
|
378 |
+
video_links.append(url)
|
379 |
+
|
380 |
+
self.close(driver)
|
381 |
+
if image_links:
|
382 |
+
image_links = list(set(image_links))
|
383 |
+
if video_links:
|
384 |
+
video_links = list(set(video_links))
|
385 |
+
for i in video_links:
|
386 |
+
image_links.remove(i)
|
387 |
+
|
388 |
+
self.return_dict.get("image").extend(image_links)
|
389 |
+
self.return_dict.get("video").extend(video_links)
|
390 |
+
return self.return_dict
|
391 |
+
|
392 |
+
|
393 |
+
Driver = ChromeDriver()
|
instagram.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import requests
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import uuid
|
6 |
+
from PIL import Image, ImageEnhance
|
7 |
+
from fastapi import APIRouter, Depends
|
8 |
+
from fastapi.responses import StreamingResponse
|
9 |
+
from fastapi import UploadFile
|
10 |
+
from fastapi.responses import JSONResponse
|
11 |
+
from fastapi import HTTPException
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
from pydantic import BaseModel
|
14 |
+
from pymongo import MongoClient
|
15 |
+
from models import *
|
16 |
+
from driver import Driver
|
17 |
+
from huggingface_hub import InferenceClient
|
18 |
+
from selenium.webdriver.common.by import By
|
19 |
+
from selenium.webdriver.support.expected_conditions import (
|
20 |
+
presence_of_element_located, visibility_of_element_located)
|
21 |
+
from selenium.webdriver.support.wait import WebDriverWait
|
22 |
+
|
23 |
+
|
24 |
+
class FluxAI(BaseModel):
|
25 |
+
user_id: int
|
26 |
+
args: str
|
27 |
+
auto_enhancer: bool = False
|
28 |
+
|
29 |
+
class MistralAI(BaseModel):
|
30 |
+
args: str
|
31 |
+
|
32 |
+
router = APIRouter()
|
33 |
+
|
34 |
+
def obtain_ids(user: str):
|
35 |
+
response = requests.get("https://www.instagram.com/" + user)
|
36 |
+
appid = re.search(r'appId":"(\d*)', response.text)[1]
|
37 |
+
serverid = re.search(r'server_revision":(\d*)', response.text)[1]
|
38 |
+
return appid, serverid
|
39 |
+
|
40 |
+
@router.post("/akeno/instagram/reels", response_model=SuccessResponse, responses={422: {"model": SuccessResponse}})
|
41 |
+
async def instagram_reels(payload: InstagramX):
|
42 |
+
isInstagramLink = lambda link: bool(
|
43 |
+
re.match(r"^https?://(?:www\.)?instagram\.com/reel/", link)
|
44 |
+
)
|
45 |
+
if not isInstagramLink(payload.link):
|
46 |
+
return SuccessResponse(
|
47 |
+
status="False",
|
48 |
+
randydev={"error": "Give a valid Instagram reels link."}
|
49 |
+
)
|
50 |
+
try:
|
51 |
+
driver, error_message = Driver.get()
|
52 |
+
if not driver:
|
53 |
+
return SuccessResponse(
|
54 |
+
status="False",
|
55 |
+
randydev={"error": error_message}
|
56 |
+
)
|
57 |
+
driver.get(payload.link)
|
58 |
+
wait = WebDriverWait(driver, 10)
|
59 |
+
element = wait.until(presence_of_element_located((By.TAG_NAME, "video")))
|
60 |
+
reels_url = element.get_attribute("src")
|
61 |
+
driver.quit()
|
62 |
+
if reels_url:
|
63 |
+
binary_content = requests.get(reels_url).content
|
64 |
+
file_name = f"reels_{int(time.time())}.mp4"
|
65 |
+
with open(file_name, "wb") as file:
|
66 |
+
file.write(binary_content)
|
67 |
+
with open(file_name, "rb") as video:
|
68 |
+
encoded_string = base64.b64encode(video.read()).decode('utf-8')
|
69 |
+
os.remove(file_name)
|
70 |
+
return SuccessResponse(
|
71 |
+
status="True",
|
72 |
+
randydev={"video_data": encoded_string}
|
73 |
+
)
|
74 |
+
except Exception as e:
|
75 |
+
return SuccessResponse(
|
76 |
+
status="False",
|
77 |
+
randydev={"error": f"An error occurred: {str(e)}"}
|
78 |
+
)
|
main.py
CHANGED
@@ -86,6 +86,7 @@ import logging
|
|
86 |
import functions as code
|
87 |
from fluxai import router as fluxai_router
|
88 |
from whisper import router as whisper_router
|
|
|
89 |
from driver import YoutubeDriver
|
90 |
from yt_dlp import YoutubeDL
|
91 |
|
@@ -133,6 +134,7 @@ trans = SyncTranslator()
|
|
133 |
app = FastAPI(docs_url=None, redoc_url="/")
|
134 |
app.include_router(fluxai_router, prefix="/api/v1")
|
135 |
app.include_router(whisper_router, prefix="/api/v1")
|
|
|
136 |
|
137 |
timeout = 100
|
138 |
|
|
|
86 |
import functions as code
|
87 |
from fluxai import router as fluxai_router
|
88 |
from whisper import router as whisper_router
|
89 |
+
from instagram import router as instagram_router
|
90 |
from driver import YoutubeDriver
|
91 |
from yt_dlp import YoutubeDL
|
92 |
|
|
|
134 |
app = FastAPI(docs_url=None, redoc_url="/")
|
135 |
app.include_router(fluxai_router, prefix="/api/v1")
|
136 |
app.include_router(whisper_router, prefix="/api/v1")
|
137 |
+
app.include_router(instagram_router, prefix="/api/v1")
|
138 |
|
139 |
timeout = 100
|
140 |
|