Chrunos commited on
Commit
36e6458
·
verified ·
1 Parent(s): 0efafe8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +167 -38
app.py CHANGED
@@ -1,4 +1,4 @@
1
- from fastapi import FastAPI, HTTPException, status
2
  from fastapi.responses import StreamingResponse
3
  import instaloader
4
  import requests
@@ -7,6 +7,8 @@ import time
7
  import logging
8
  import random
9
  from functools import wraps
 
 
10
 
11
  # Configure logging
12
  logging.basicConfig(
@@ -28,19 +30,32 @@ SESSION_FILE = os.path.join(SESSION_DIR, f"session-{INSTAGRAM_USERNAME}") if INS
28
  # Create session directory if not exists
29
  os.makedirs(SESSION_DIR, exist_ok=True)
30
 
 
 
 
 
31
  USER_AGENTS = [
32
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
33
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
34
- "Mozilla/5.0 (Linux; Android 10; SM-G981B) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36"
 
 
35
  ]
36
 
37
- def get_instaloader() -> instaloader.Instaloader:
38
  """Create and configure Instaloader instance with proper parameters"""
39
  L = instaloader.Instaloader(
40
  sleep=True,
41
- request_timeout=300,
42
- max_connection_attempts=2,
43
  user_agent=random.choice(USER_AGENTS),
 
 
 
 
 
 
 
44
  )
45
 
46
  if not INSTAGRAM_USERNAME or not INSTAGRAM_PASSWORD:
@@ -51,33 +66,50 @@ def get_instaloader() -> instaloader.Instaloader:
51
  )
52
 
53
  try:
54
- if SESSION_FILE and os.path.exists(SESSION_FILE):
55
  logger.info(f"Attempting to load session from {SESSION_FILE}")
56
  try:
57
  L.load_session_from_file(INSTAGRAM_USERNAME, SESSION_FILE)
58
  logger.info("Session loaded successfully")
59
- # Verify session is still valid
60
- test_profile = instaloader.Profile.from_username(L.context, INSTAGRAM_USERNAME)
61
- if not test_profile.is_verified:
62
- raise Exception("Session validation failed")
63
- return L
 
 
 
64
  except Exception as e:
65
  logger.warning(f"Session load failed: {str(e)}, performing fresh login")
66
-
 
 
 
67
  logger.info("Starting fresh login process")
68
  L.login(INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)
69
 
 
70
  L.context._session.headers.update({
71
  'Accept-Language': 'en-US,en;q=0.9',
72
  'Accept-Encoding': 'gzip, deflate, br',
73
  'Referer': 'https://www.instagram.com/',
 
74
  'DNT': '1',
 
 
 
 
 
 
75
  })
76
 
77
  if SESSION_FILE:
78
  L.save_session_to_file(SESSION_FILE)
79
  logger.info(f"Saved new session to {SESSION_FILE}")
80
 
 
 
 
81
  return L
82
 
83
  except instaloader.exceptions.BadCredentialsException as e:
@@ -99,23 +131,93 @@ def get_instaloader() -> instaloader.Instaloader:
99
  detail="Instagram login service unavailable"
100
  )
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  def handle_instagram_errors(func):
103
  @wraps(func)
104
  async def wrapper(*args, **kwargs):
 
 
 
 
 
105
  try:
 
 
 
 
 
 
 
106
  return await func(*args, **kwargs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  except instaloader.exceptions.QueryReturnedBadRequestException as e:
108
  logger.error("API error 400: %s", str(e))
 
 
109
  raise HTTPException(
110
  status_code=status.HTTP_429_TOO_MANY_REQUESTS,
111
  detail="Instagram rate limit exceeded"
112
  )
113
- except instaloader.exceptions.ConnectionException as e:
114
- logger.error("Connection error: %s", str(e))
115
- raise HTTPException(
116
- status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
117
- detail="Instagram service unavailable"
118
- )
119
  except Exception as e:
120
  logger.error("Unexpected error: %s", str(e))
121
  raise HTTPException(
@@ -126,45 +228,57 @@ def handle_instagram_errors(func):
126
 
127
  @app.get("/stories/{username}")
128
  @handle_instagram_errors
129
- async def get_stories(username: str):
130
  """Retrieve stories with enhanced anti-detection measures"""
131
  logger.info(f"Processing request for @{username}")
132
 
133
  try:
 
134
  L = get_instaloader()
135
  logger.info("Instaloader instance configured")
136
 
137
- # Randomized delay before profile request
138
- delay = random.uniform(2.5, 5.5)
139
  logger.debug(f"Applying initial delay of {delay:.2f}s")
140
  time.sleep(delay)
141
 
142
  # Profile lookup with retry
 
143
  for attempt in range(3):
144
  try:
145
  logger.info(f"Fetching profile (attempt {attempt+1}/3)")
146
  profile = instaloader.Profile.from_username(L.context, username)
147
  break
148
  except instaloader.exceptions.QueryReturnedBadRequestException:
149
- wait_time = (attempt + 1) * 10
150
- logger.warning(f"Rate limited, waiting {wait_time}s")
151
  time.sleep(wait_time)
152
- else:
 
 
 
 
 
 
 
 
 
 
153
  raise HTTPException(
154
  status.HTTP_429_TOO_MANY_REQUESTS,
155
  "Too many attempts to access Instagram"
156
  )
157
 
158
  logger.info(f"Access check for @{username}")
159
- if not profile.has_public_story and not profile.is_followed_by_viewer:
160
- logger.warning("Private profile access denied")
161
  raise HTTPException(
162
- status.HTTP_403_FORBIDDEN,
163
  "No accessible stories for this profile"
164
  )
165
 
166
- # Additional delay before story fetch
167
- time.sleep(random.uniform(1.5, 3.5))
168
 
169
  logger.info("Fetching stories")
170
  stories = []
@@ -179,14 +293,16 @@ async def get_stories(username: str):
179
  "timestamp": item.date_utc.isoformat(),
180
  }
181
 
182
- # Only try to add view_count if it's a video and the attribute exists
 
 
 
 
183
  if item.is_video:
184
  try:
185
- # Safely check if view_count attribute exists
186
  if hasattr(item, 'view_count'):
187
  story_data["views"] = item.view_count
188
  except AttributeError:
189
- # Skip adding views if the attribute doesn't exist
190
  pass
191
 
192
  stories.append(story_data)
@@ -205,11 +321,14 @@ async def get_stories(username: str):
205
  "No active stories available"
206
  )
207
 
208
- # Final randomized delay
209
- time.sleep(random.uniform(0.5, 1.5))
 
 
 
210
 
211
  logger.info(f"Returning {len(stories)} stories")
212
- return {"data": stories}
213
 
214
  except Exception as e:
215
  logger.error(f"Critical failure: {str(e)}")
@@ -230,21 +349,26 @@ async def download_media(url: str):
230
  )
231
 
232
  # Random delay to avoid detection patterns
233
- time.sleep(random.uniform(1.0, 3.0))
234
 
235
  # Configure headers to mimic a browser
236
  headers = {
237
  "User-Agent": random.choice(USER_AGENTS),
238
  "Accept": "*/*",
239
  "Accept-Language": "en-US,en;q=0.9",
 
240
  "Referer": "https://www.instagram.com/",
 
241
  "Sec-Fetch-Dest": "empty",
242
  "Sec-Fetch-Mode": "cors",
243
  "Sec-Fetch-Site": "cross-site",
 
 
244
  }
245
 
246
  # Request the media with a session
247
- response = requests.get(url, headers=headers, stream=True, timeout=30)
 
248
  response.raise_for_status()
249
 
250
  # Determine content type from response or URL
@@ -276,4 +400,9 @@ async def download_media(url: str):
276
  raise HTTPException(
277
  status_code=status.HTTP_502_BAD_GATEWAY,
278
  detail="Failed to download media"
279
- )
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, status, BackgroundTasks
2
  from fastapi.responses import StreamingResponse
3
  import instaloader
4
  import requests
 
7
  import logging
8
  import random
9
  from functools import wraps
10
+ import json
11
+ from datetime import datetime, timedelta
12
 
13
  # Configure logging
14
  logging.basicConfig(
 
30
  # Create session directory if not exists
31
  os.makedirs(SESSION_DIR, exist_ok=True)
32
 
33
+ # Rate limiting state
34
+ LAST_REQUEST_TIME = {}
35
+ COOLDOWN_PERIODS = {}
36
+
37
  USER_AGENTS = [
38
  "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
39
  "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Safari/605.1.15",
40
+ "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
41
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1",
42
+ "Mozilla/5.0 (iPad; CPU OS 17_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Mobile/15E148 Safari/604.1"
43
  ]
44
 
45
+ def get_instaloader(force_login=False) -> instaloader.Instaloader:
46
  """Create and configure Instaloader instance with proper parameters"""
47
  L = instaloader.Instaloader(
48
  sleep=True,
49
+ request_timeout=30, # Reduced timeout
50
+ max_connection_attempts=3,
51
  user_agent=random.choice(USER_AGENTS),
52
+ download_pictures=False,
53
+ download_videos=False,
54
+ download_video_thumbnails=False,
55
+ download_geotags=False,
56
+ download_comments=False,
57
+ compress_json=False,
58
+ post_metadata_txt_pattern=''
59
  )
60
 
61
  if not INSTAGRAM_USERNAME or not INSTAGRAM_PASSWORD:
 
66
  )
67
 
68
  try:
69
+ if not force_login and SESSION_FILE and os.path.exists(SESSION_FILE):
70
  logger.info(f"Attempting to load session from {SESSION_FILE}")
71
  try:
72
  L.load_session_from_file(INSTAGRAM_USERNAME, SESSION_FILE)
73
  logger.info("Session loaded successfully")
74
+ # Test session without using is_verified
75
+ try:
76
+ test_profile = instaloader.Profile.from_username(L.context, INSTAGRAM_USERNAME)
77
+ logger.info(f"Session test: Profile ID {test_profile.userid} verified")
78
+ return L
79
+ except Exception as e:
80
+ logger.warning(f"Session test failed: {str(e)}")
81
+ raise Exception("Invalid session")
82
  except Exception as e:
83
  logger.warning(f"Session load failed: {str(e)}, performing fresh login")
84
+
85
+ # Add delay before login to mimic human behavior
86
+ time.sleep(random.uniform(1.0, 3.0))
87
+
88
  logger.info("Starting fresh login process")
89
  L.login(INSTAGRAM_USERNAME, INSTAGRAM_PASSWORD)
90
 
91
+ # Set realistic headers
92
  L.context._session.headers.update({
93
  'Accept-Language': 'en-US,en;q=0.9',
94
  'Accept-Encoding': 'gzip, deflate, br',
95
  'Referer': 'https://www.instagram.com/',
96
+ 'Origin': 'https://www.instagram.com',
97
  'DNT': '1',
98
+ 'Connection': 'keep-alive',
99
+ 'Sec-Fetch-Dest': 'empty',
100
+ 'Sec-Fetch-Mode': 'cors',
101
+ 'Sec-Fetch-Site': 'same-origin',
102
+ 'Pragma': 'no-cache',
103
+ 'Cache-Control': 'no-cache',
104
  })
105
 
106
  if SESSION_FILE:
107
  L.save_session_to_file(SESSION_FILE)
108
  logger.info(f"Saved new session to {SESSION_FILE}")
109
 
110
+ # Add delay after login
111
+ time.sleep(random.uniform(2.0, 4.0))
112
+
113
  return L
114
 
115
  except instaloader.exceptions.BadCredentialsException as e:
 
131
  detail="Instagram login service unavailable"
132
  )
133
 
134
+ def is_rate_limited(username: str) -> bool:
135
+ """Check if a given username is currently rate limited"""
136
+ # If in cooldown period, check if it's expired
137
+ if username in COOLDOWN_PERIODS:
138
+ if datetime.now() < COOLDOWN_PERIODS[username]:
139
+ remaining = (COOLDOWN_PERIODS[username] - datetime.now()).seconds
140
+ logger.warning(f"User {username} in cooldown for {remaining} more seconds")
141
+ return True
142
+ else:
143
+ # Cooldown expired
144
+ del COOLDOWN_PERIODS[username]
145
+
146
+ # Check time since last request
147
+ if username in LAST_REQUEST_TIME:
148
+ elapsed = (datetime.now() - LAST_REQUEST_TIME[username]).seconds
149
+ min_interval = 60 # Minimum 60 seconds between requests
150
+ if elapsed < min_interval:
151
+ logger.warning(f"Rate limiting {username}: {elapsed}s elapsed, need {min_interval}s")
152
+ return True
153
+
154
+ # Update last request time
155
+ LAST_REQUEST_TIME[username] = datetime.now()
156
+ return False
157
+
158
+ def handle_rate_limit_error(username: str):
159
+ """Handle rate limit by setting cooldown period"""
160
+ # Set cooldown period (increases with repeated issues)
161
+ cooldown_minutes = 5
162
+ if username in COOLDOWN_PERIODS:
163
+ cooldown_minutes = min(cooldown_minutes * 2, 30) # Exponential backoff up to 30 minutes
164
+
165
+ COOLDOWN_PERIODS[username] = datetime.now() + timedelta(minutes=cooldown_minutes)
166
+ logger.warning(f"Setting cooldown for {username} for {cooldown_minutes} minutes")
167
+
168
  def handle_instagram_errors(func):
169
  @wraps(func)
170
  async def wrapper(*args, **kwargs):
171
+ # Extract username from request path
172
+ request_username = ""
173
+ if len(args) > 1 and hasattr(args[1], '__str__'):
174
+ request_username = str(args[1])
175
+
176
  try:
177
+ # Check for rate limiting
178
+ if request_username and is_rate_limited(request_username):
179
+ raise HTTPException(
180
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
181
+ detail="Rate limit exceeded. Please try again later."
182
+ )
183
+
184
  return await func(*args, **kwargs)
185
+
186
+ except instaloader.exceptions.ConnectionException as e:
187
+ error_message = str(e)
188
+ logger.error("Connection error: %s", error_message)
189
+
190
+ if "401 Unauthorized" in error_message and "Please wait a few minutes" in error_message:
191
+ if request_username:
192
+ handle_rate_limit_error(request_username)
193
+ raise HTTPException(
194
+ status_code=status.HTTP_429_TOO_MANY_REQUESTS,
195
+ detail="Instagram rate limit exceeded. Please try again later."
196
+ )
197
+ elif "404 Not Found" in error_message:
198
+ raise HTTPException(
199
+ status_code=status.HTTP_404_NOT_FOUND,
200
+ detail="Profile not found or no stories available"
201
+ )
202
+ else:
203
+ raise HTTPException(
204
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
205
+ detail="Instagram service unavailable"
206
+ )
207
+
208
  except instaloader.exceptions.QueryReturnedBadRequestException as e:
209
  logger.error("API error 400: %s", str(e))
210
+ if request_username:
211
+ handle_rate_limit_error(request_username)
212
  raise HTTPException(
213
  status_code=status.HTTP_429_TOO_MANY_REQUESTS,
214
  detail="Instagram rate limit exceeded"
215
  )
216
+
217
+ except HTTPException:
218
+ # Re-raise HTTP exceptions without modification
219
+ raise
220
+
 
221
  except Exception as e:
222
  logger.error("Unexpected error: %s", str(e))
223
  raise HTTPException(
 
228
 
229
  @app.get("/stories/{username}")
230
  @handle_instagram_errors
231
+ async def get_stories(username: str, background_tasks: BackgroundTasks):
232
  """Retrieve stories with enhanced anti-detection measures"""
233
  logger.info(f"Processing request for @{username}")
234
 
235
  try:
236
+ # Get loader with session
237
  L = get_instaloader()
238
  logger.info("Instaloader instance configured")
239
 
240
+ # Randomized delay before profile request (more natural)
241
+ delay = random.uniform(1.5, 3.0)
242
  logger.debug(f"Applying initial delay of {delay:.2f}s")
243
  time.sleep(delay)
244
 
245
  # Profile lookup with retry
246
+ profile = None
247
  for attempt in range(3):
248
  try:
249
  logger.info(f"Fetching profile (attempt {attempt+1}/3)")
250
  profile = instaloader.Profile.from_username(L.context, username)
251
  break
252
  except instaloader.exceptions.QueryReturnedBadRequestException:
253
+ wait_time = (attempt + 1) * random.uniform(3.0, 5.0)
254
+ logger.warning(f"Rate limited, waiting {wait_time:.2f}s")
255
  time.sleep(wait_time)
256
+ except instaloader.exceptions.ConnectionException as e:
257
+ if "401 Unauthorized" in str(e) and "Please wait a few minutes" in str(e):
258
+ # Try with a fresh login if session might be expired
259
+ if attempt < 2: # Only try this once
260
+ logger.warning("Session may be expired, trying with fresh login")
261
+ L = get_instaloader(force_login=True)
262
+ time.sleep(random.uniform(4.0, 6.0))
263
+ continue
264
+ raise
265
+
266
+ if profile is None:
267
  raise HTTPException(
268
  status.HTTP_429_TOO_MANY_REQUESTS,
269
  "Too many attempts to access Instagram"
270
  )
271
 
272
  logger.info(f"Access check for @{username}")
273
+ if not profile.has_viewable_story:
274
+ logger.warning("No viewable story")
275
  raise HTTPException(
276
+ status.HTTP_404_NOT_FOUND,
277
  "No accessible stories for this profile"
278
  )
279
 
280
+ # Additional delay before story fetch (variable to look more natural)
281
+ time.sleep(random.uniform(1.0, 2.5))
282
 
283
  logger.info("Fetching stories")
284
  stories = []
 
293
  "timestamp": item.date_utc.isoformat(),
294
  }
295
 
296
+ # Add any available metadata safely
297
+ if hasattr(item, 'owner_username'):
298
+ story_data["username"] = item.owner_username
299
+
300
+ # Only try to add view_count if it's a video
301
  if item.is_video:
302
  try:
 
303
  if hasattr(item, 'view_count'):
304
  story_data["views"] = item.view_count
305
  except AttributeError:
 
306
  pass
307
 
308
  stories.append(story_data)
 
321
  "No active stories available"
322
  )
323
 
324
+ # Queue session save in background to not delay response
325
+ background_tasks.add_task(lambda: L.save_session_to_file(SESSION_FILE) if SESSION_FILE else None)
326
+
327
+ # Final randomized delay before response (looks more natural)
328
+ time.sleep(random.uniform(0.3, 0.7))
329
 
330
  logger.info(f"Returning {len(stories)} stories")
331
+ return {"data": stories, "count": len(stories), "username": username}
332
 
333
  except Exception as e:
334
  logger.error(f"Critical failure: {str(e)}")
 
349
  )
350
 
351
  # Random delay to avoid detection patterns
352
+ time.sleep(random.uniform(0.5, 1.5))
353
 
354
  # Configure headers to mimic a browser
355
  headers = {
356
  "User-Agent": random.choice(USER_AGENTS),
357
  "Accept": "*/*",
358
  "Accept-Language": "en-US,en;q=0.9",
359
+ "Accept-Encoding": "gzip, deflate, br",
360
  "Referer": "https://www.instagram.com/",
361
+ "Origin": "https://www.instagram.com",
362
  "Sec-Fetch-Dest": "empty",
363
  "Sec-Fetch-Mode": "cors",
364
  "Sec-Fetch-Site": "cross-site",
365
+ "Pragma": "no-cache",
366
+ "Cache-Control": "no-cache",
367
  }
368
 
369
  # Request the media with a session
370
+ session = requests.Session()
371
+ response = session.get(url, headers=headers, stream=True, timeout=10)
372
  response.raise_for_status()
373
 
374
  # Determine content type from response or URL
 
400
  raise HTTPException(
401
  status_code=status.HTTP_502_BAD_GATEWAY,
402
  detail="Failed to download media"
403
+ )
404
+
405
+ # Add a health check endpoint
406
+ @app.get("/health")
407
+ async def health_check():
408
+ return {"status": "ok", "timestamp": datetime.now().isoformat()}