randydev commited on
Commit
2adba1d
·
verified ·
1 Parent(s): aebef05

Upload 3 files

Browse files
Files changed (3) hide show
  1. driver.py +242 -0
  2. instagram.py +78 -0
  3. main.py +2 -0
driver.py CHANGED
@@ -17,6 +17,11 @@ from selenium.webdriver.common.by import By
17
  from selenium.webdriver.support.expected_conditions import presence_of_element_located
18
  from selenium.webdriver.support.wait import WebDriverWait
19
 
 
 
 
 
 
20
  class YoutubeDriver:
21
  def __init__(self, search_terms: str, max_results: int = 5):
22
  self.base_url = "https://youtube.com/results?search_query={0}"
@@ -149,3 +154,240 @@ class YoutubeDriver:
149
  "quiet": True,
150
  "logtostderr": False,
151
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  from selenium.webdriver.support.expected_conditions import presence_of_element_located
18
  from selenium.webdriver.support.wait import WebDriverWait
19
 
20
+ CHROME_BIN = "/app/.chrome-for-testing/chrome-linux64/chrome"
21
+ CHROME_DRIVER = "/app/.chrome-for-testing/chromedriver-linux64/chromedriver"
22
+ DWL_DIR = "./downloads/"
23
+ TEMP_DIR = "./temp/"
24
+
25
  class YoutubeDriver:
26
  def __init__(self, search_terms: str, max_results: int = 5):
27
  self.base_url = "https://youtube.com/results?search_query={0}"
 
154
  "quiet": True,
155
  "logtostderr": False,
156
  }
157
+
158
+ class ChromeDriver:
159
+ def __init__(self) -> None:
160
+ self.carbon_theme = [
161
+ "3024-night",
162
+ "a11y-dark",
163
+ "blackboard",
164
+ "base16-dark",
165
+ "base16-light",
166
+ "cobalt",
167
+ "duotone-dark",
168
+ "hopscotch",
169
+ "lucario",
170
+ "material",
171
+ "monokai",
172
+ "night-owl",
173
+ "nord",
174
+ "oceanic-next",
175
+ "one-light",
176
+ "one-dark",
177
+ "panda-syntax",
178
+ "paraiso-dark",
179
+ "seti",
180
+ "shades-of-purple",
181
+ "solarized+dark",
182
+ "solarized+light",
183
+ "synthwave-84",
184
+ "twilight",
185
+ "verminal",
186
+ "vscode",
187
+ "yeti",
188
+ "zenburn",
189
+ ]
190
+
191
+ def get(self):
192
+ if not CHROME_BIN:
193
+ return (
194
+ None,
195
+ "ChromeBinaryErr: No binary path found! Install Chromium or Google Chrome.",
196
+ )
197
+
198
+ try:
199
+ options = Options()
200
+ options.binary_location = CHROME_BIN
201
+ options.add_argument("--disable-dev-shm-usage")
202
+ options.add_argument("--ignore-certificate-errors")
203
+ options.add_argument("--disable-gpu")
204
+ options.add_argument("--headless=new")
205
+ options.add_argument("--test-type")
206
+ options.add_argument("--no-sandbox")
207
+ options.add_argument("--window-size=1920x1080")
208
+ options.add_experimental_option(
209
+ "prefs", {"download.default_directory": "./"}
210
+ )
211
+ service = Service(CHROME_DRIVER)
212
+ driver = webdriver.Chrome(options, service)
213
+ return driver, None
214
+ except Exception as e:
215
+ return None, f"ChromeDriverErr: {e}"
216
+
217
+ def close(self, driver: webdriver.Chrome):
218
+ driver.close()
219
+ driver.quit()
220
+
221
+ @property
222
+ def get_random_carbon(self) -> str:
223
+ url = "https://carbon.now.sh/?l=auto"
224
+ url += f"&t={random.choice(self.carbon_theme)}"
225
+ url += f"&bg=rgba%28{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C{random.randint(1, 255)}%2C1%29"
226
+ url += "&code="
227
+ return url
228
+
229
+ async def generate_carbon(
230
+ self, driver: webdriver.Chrome, code: str, is_random: bool = False
231
+ ) -> str:
232
+ filename = f"{round(time.time())}"
233
+ BASE_URL = (
234
+ self.get_random_carbon
235
+ if is_random
236
+ else "https://carbon.now.sh/?l=auto&code="
237
+ )
238
+
239
+ driver.get(BASE_URL + format_text(quote_plus(code)))
240
+ driver.command_executor._commands["send_command"] = (
241
+ "POST",
242
+ "/session/$sessionId/chromium/send_command",
243
+ )
244
+ params = {
245
+ "cmd": "Page.setDownloadBehavior",
246
+ "params": {"behavior": "allow", "downloadPath": DWL_DIR},
247
+ }
248
+ driver.execute("send_command", params)
249
+
250
+ driver.find_element(By.XPATH, "//button[@id='export-menu']").click()
251
+ driver.find_element(By.XPATH, "//input[@title='filename']").send_keys(filename)
252
+ driver.find_element(By.XPATH, "//button[@id='export-png']").click()
253
+
254
+ return f"{DWL_DIR}/{filename}.png"
255
+
256
+ class SCRAP_DATA:
257
+ """Class to get and handel scrapped data"""
258
+
259
+ def __init__(self, urls: list[str] | str) -> None:
260
+ self.urls = urls
261
+ self.path = "./scrapped/"
262
+ if not os.path.isdir(self.path):
263
+ os.makedirs("./scrapped/")
264
+
265
+ def get_images(self) -> list:
266
+ images = []
267
+ if isinstance(self.urls, str):
268
+ requested = requests.get(self.urls)
269
+ try:
270
+ name = self.path + f"img_{time.time()}.jpg"
271
+ with open(name, "wb") as f:
272
+ f.write(requested.content)
273
+ images.append(name)
274
+ except Exception as e:
275
+ requested.close()
276
+ else:
277
+ for i in self.urls:
278
+ if i:
279
+ requested = requests.get(i)
280
+ else:
281
+ continue
282
+ try:
283
+ name = self.path + f"img_{time.time()}.jpg"
284
+ with open(name, "wb") as f:
285
+ f.write(requested.content)
286
+ images.append(name)
287
+ except Exception as e:
288
+
289
+ requested.close()
290
+ continue
291
+ return images
292
+
293
+ def get_videos(self) -> list:
294
+ videos = []
295
+ if isinstance(self.urls, str):
296
+ if i:
297
+ requested = requests.get(i)
298
+ else:
299
+ return []
300
+ try:
301
+ name = self.path + f"vid_{time.time()}.mp4"
302
+ with open(name, "wb") as f:
303
+ f.write(requested.content)
304
+ videos.append(name)
305
+ except Exception as e:
306
+ requested.close()
307
+ else:
308
+ for i in self.urls:
309
+ if i:
310
+ requested = requests.get(i)
311
+ else:
312
+ continue
313
+ try:
314
+ name = self.path + f"vid_{time.time()}.mp4"
315
+ with open(name, "wb") as f:
316
+ f.write(requested.content)
317
+ videos.append(name)
318
+ except Exception as e:
319
+
320
+ requested.close()
321
+ continue
322
+ return videos
323
+
324
+
325
+ class INSTAGRAM(ChromeDriver):
326
+ """Class to scrap data from instagram"""
327
+
328
+ def __init__(self, url: str) -> None:
329
+ self.url = url
330
+ self.article = "article._aa6a"
331
+ self.ul_class = "_acay"
332
+ self.image_class = "x5yr21d"
333
+ self.video_class = "x1lliihq"
334
+ self.next_button = "button._afxw"
335
+ self.return_dict = {"image": [], "video": []}
336
+ super().__init__()
337
+
338
+ def get_all(self):
339
+ driver, error = self.get()
340
+ if not driver:
341
+ return error
342
+
343
+ driver.get(self.url)
344
+ wait = WebDriverWait(driver, 30)
345
+ image_links = []
346
+ video_links = []
347
+ try:
348
+ element = wait.until(presence_of_element_located(
349
+ (By.CLASS_NAME, self.ul_class)))
350
+
351
+ while True:
352
+ sub_element = element.find_elements(
353
+ By.CLASS_NAME, self.image_class)
354
+ for i in sub_element:
355
+ url = i.get_attribute("src")
356
+ image_links.append(url)
357
+
358
+ sub_element = element.find_elements(
359
+ By.CLASS_NAME, self.video_class)
360
+ for i in sub_element:
361
+ url = i.get_attribute("src")
362
+ video_links.append(url)
363
+
364
+ try:
365
+ driver.find_element(
366
+ By.CSS_SELECTOR, self.next_button).click()
367
+ except:
368
+ break
369
+ except:
370
+ element = wait.until(presence_of_element_located((By.CSS_SELECTOR, self.article)))
371
+ try:
372
+ sub_element = element.find_element(By.TAG_NAME, "img")
373
+ url = sub_element.get_attribute("src")
374
+ image_links.append(url)
375
+ except:
376
+ sub_element = element.find_element(By.TAG_NAME, "video")
377
+ url = sub_element.get_attribute("src")
378
+ video_links.append(url)
379
+
380
+ self.close(driver)
381
+ if image_links:
382
+ image_links = list(set(image_links))
383
+ if video_links:
384
+ video_links = list(set(video_links))
385
+ for i in video_links:
386
+ image_links.remove(i)
387
+
388
+ self.return_dict.get("image").extend(image_links)
389
+ self.return_dict.get("video").extend(video_links)
390
+ return self.return_dict
391
+
392
+
393
+ Driver = ChromeDriver()
instagram.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import requests
3
+ import os
4
+ import re
5
+ import uuid
6
+ from PIL import Image, ImageEnhance
7
+ from fastapi import APIRouter, Depends
8
+ from fastapi.responses import StreamingResponse
9
+ from fastapi import UploadFile
10
+ from fastapi.responses import JSONResponse
11
+ from fastapi import HTTPException
12
+ from dotenv import load_dotenv
13
+ from pydantic import BaseModel
14
+ from pymongo import MongoClient
15
+ from models import *
16
+ from driver import Driver
17
+ from huggingface_hub import InferenceClient
18
+ from selenium.webdriver.common.by import By
19
+ from selenium.webdriver.support.expected_conditions import (
20
+ presence_of_element_located, visibility_of_element_located)
21
+ from selenium.webdriver.support.wait import WebDriverWait
22
+
23
+
24
+ class FluxAI(BaseModel):
25
+ user_id: int
26
+ args: str
27
+ auto_enhancer: bool = False
28
+
29
+ class MistralAI(BaseModel):
30
+ args: str
31
+
32
+ router = APIRouter()
33
+
34
+ def obtain_ids(user: str):
35
+ response = requests.get("https://www.instagram.com/" + user)
36
+ appid = re.search(r'appId":"(\d*)', response.text)[1]
37
+ serverid = re.search(r'server_revision":(\d*)', response.text)[1]
38
+ return appid, serverid
39
+
40
+ @router.post("/akeno/instagram/reels", response_model=SuccessResponse, responses={422: {"model": SuccessResponse}})
41
+ async def instagram_reels(payload: InstagramX):
42
+ isInstagramLink = lambda link: bool(
43
+ re.match(r"^https?://(?:www\.)?instagram\.com/reel/", link)
44
+ )
45
+ if not isInstagramLink(payload.link):
46
+ return SuccessResponse(
47
+ status="False",
48
+ randydev={"error": "Give a valid Instagram reels link."}
49
+ )
50
+ try:
51
+ driver, error_message = Driver.get()
52
+ if not driver:
53
+ return SuccessResponse(
54
+ status="False",
55
+ randydev={"error": error_message}
56
+ )
57
+ driver.get(payload.link)
58
+ wait = WebDriverWait(driver, 10)
59
+ element = wait.until(presence_of_element_located((By.TAG_NAME, "video")))
60
+ reels_url = element.get_attribute("src")
61
+ driver.quit()
62
+ if reels_url:
63
+ binary_content = requests.get(reels_url).content
64
+ file_name = f"reels_{int(time.time())}.mp4"
65
+ with open(file_name, "wb") as file:
66
+ file.write(binary_content)
67
+ with open(file_name, "rb") as video:
68
+ encoded_string = base64.b64encode(video.read()).decode('utf-8')
69
+ os.remove(file_name)
70
+ return SuccessResponse(
71
+ status="True",
72
+ randydev={"video_data": encoded_string}
73
+ )
74
+ except Exception as e:
75
+ return SuccessResponse(
76
+ status="False",
77
+ randydev={"error": f"An error occurred: {str(e)}"}
78
+ )
main.py CHANGED
@@ -86,6 +86,7 @@ import logging
86
  import functions as code
87
  from fluxai import router as fluxai_router
88
  from whisper import router as whisper_router
 
89
  from driver import YoutubeDriver
90
  from yt_dlp import YoutubeDL
91
 
@@ -133,6 +134,7 @@ trans = SyncTranslator()
133
  app = FastAPI(docs_url=None, redoc_url="/")
134
  app.include_router(fluxai_router, prefix="/api/v1")
135
  app.include_router(whisper_router, prefix="/api/v1")
 
136
 
137
  timeout = 100
138
 
 
86
  import functions as code
87
  from fluxai import router as fluxai_router
88
  from whisper import router as whisper_router
89
+ from instagram import router as instagram_router
90
  from driver import YoutubeDriver
91
  from yt_dlp import YoutubeDL
92
 
 
134
  app = FastAPI(docs_url=None, redoc_url="/")
135
  app.include_router(fluxai_router, prefix="/api/v1")
136
  app.include_router(whisper_router, prefix="/api/v1")
137
+ app.include_router(instagram_router, prefix="/api/v1")
138
 
139
  timeout = 100
140