Besimplestudio commited on
Commit
a97da81
·
verified ·
1 Parent(s): 3f54538

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +104 -167
app.py CHANGED
@@ -1,217 +1,148 @@
1
- import gradio as gr
2
  import requests
3
- import json
4
  import pandas as pd
5
- import time
 
 
6
  from datetime import datetime
7
  from collections import Counter
8
- import numpy as np
9
- from typing import List, Dict, Any
10
- import logging
11
- import sys
12
 
13
- # Set up logging
14
- logging.basicConfig(
15
- level=logging.INFO,
16
- format='%(asctime)s - %(levelname)s - %(message)s',
17
- handlers=[
18
- logging.StreamHandler(sys.stdout)
19
- ]
20
- )
21
 
22
  class AmazonSuggestionExpander:
23
  def __init__(self):
 
24
  self.headers = {
25
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
26
  }
27
- self.cache = {}
28
- self.last_request_time = 0
29
- self.request_delay = 1.0 # Minimum delay between requests
30
-
31
- def _rate_limit(self):
32
- """Implement rate limiting between requests"""
33
- current_time = time.time()
34
- time_since_last_request = current_time - self.last_request_time
35
- if time_since_last_request < self.request_delay:
36
- time.sleep(self.request_delay - time_since_last_request)
37
- self.last_request_time = time.time()
38
-
39
- def _fetch_suggestions(self, keyword: str, max_retries: int = 3) -> List[str]:
40
- """Fetch suggestions from Amazon with retry logic and caching"""
41
- if keyword in self.cache:
42
- return self.cache[keyword]
43
-
44
- for attempt in range(max_retries):
45
- try:
46
- self._rate_limit()
47
- url = f"https://completion.amazon.com/api/2017/suggestions?mid=ATVPDKIKX0DER&alias=aps&prefix={keyword}"
48
- response = requests.get(url, headers=self.headers, timeout=10)
49
-
50
- if response.status_code == 429: # Too Many Requests
51
- logging.warning(f"Rate limited on attempt {attempt + 1}")
52
- time.sleep(30 * (attempt + 1)) # Exponential backoff
53
- continue
54
-
55
- data = json.loads(response.text)
56
- suggestions = [suggestion['value'] for suggestion in data.get('suggestions', [])]
57
- self.cache[keyword] = suggestions
58
- return suggestions
59
-
60
- except requests.exceptions.RequestException as e:
61
- logging.error(f"Request error on attempt {attempt + 1}: {str(e)}")
62
- if attempt == max_retries - 1:
63
- raise e
64
- time.sleep(5 * (attempt + 1)) # Exponential backoff
65
- except json.JSONDecodeError as e:
66
- logging.error(f"JSON decode error on attempt {attempt + 1}: {str(e)}")
67
- if attempt == max_retries - 1:
68
- raise e
69
- time.sleep(5 * (attempt + 1))
70
-
71
- return []
72
 
73
  def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
74
- """Get expanded suggestions with progress tracking"""
75
  try:
76
- suggestions = []
77
- if progress:
78
- progress(0, desc="Getting initial suggestions...")
79
-
80
- # Get suggestions for the main keyword
81
- main_suggestions = self._fetch_suggestions(keyword)
82
- suggestions.extend(main_suggestions)
83
-
84
- # Get suggestions for each letter of the alphabet
85
- alpha = 'abcdefghijklmnopqrstuvwxyz'
86
- for i, letter in enumerate(alpha):
87
- if progress:
88
- progress((i + 1) / len(alpha), desc=f"Processing '{keyword} {letter}'...")
89
- extended_suggestions = self._fetch_suggestions(f"{keyword} {letter}")
90
- suggestions.extend(extended_suggestions)
91
 
92
- # Remove duplicates and sort
93
- suggestions = list(set(suggestions))
94
- suggestions.sort()
95
 
96
- return suggestions
97
-
 
98
  except Exception as e:
99
- logging.error(f"Error in get_amazon_suggestions: {str(e)}")
100
- return [f"Error: {str(e)}"]
101
 
102
  class KeywordAnalyzer:
103
- @staticmethod
104
- def analyze_suggestions(suggestions: List[str]) -> Dict[str, Any]:
105
- """Analyze suggestions and return detailed metrics"""
106
- df = pd.DataFrame(suggestions, columns=['Suggestions'])
107
-
108
- # Word count analysis
109
- df['word_count'] = df['Suggestions'].str.split().str.len()
110
 
111
- # Create word frequency analysis
112
  all_words = ' '.join(suggestions).lower().split()
113
- word_freq = Counter(all_words)
114
-
115
- # Price analysis (if present)
116
- price_pattern = r'\$\d+\.?\d*'
117
- df['has_price'] = df['Suggestions'].str.contains(price_pattern)
118
-
119
- # Brand mention analysis
120
- # This is a simple example - you might want to maintain a proper brand list
121
- df['has_brand'] = df['Suggestions'].apply(lambda x: any(brand in x.lower() for brand in ['amazon', 'prime']))
122
 
123
  analysis = {
124
  'total_count': len(suggestions),
125
- 'avg_word_count': df['word_count'].mean(),
126
- 'max_word_count': df['word_count'].max(),
127
- 'min_word_count': df['word_count'].min(),
128
- 'word_count_distribution': df['word_count'].value_counts().to_dict(),
129
- 'common_words': {word: count for word, count in word_freq.most_common(10)},
130
- 'price_mentions': df['has_price'].sum(),
131
- 'brand_mentions': df['has_brand'].sum(),
132
  'length_stats': {
133
- 'avg_length': df['Suggestions'].str.len().mean(),
134
- 'max_length': df['Suggestions'].str.len().max(),
135
- 'min_length': df['Suggestions'].str.len().min()
136
- }
 
 
 
137
  }
138
-
139
  return analysis
140
 
141
  def format_analysis_output(analysis: Dict[str, Any]) -> str:
142
- """Format analysis results into readable text"""
143
- output = "\n=== Analysis Results ===\n\n"
144
-
145
  output += f"Total Suggestions: {analysis['total_count']}\n"
146
- output += f"Word Count Stats:\n"
147
- output += f"- Average: {analysis['avg_word_count']:.1f} words\n"
148
- output += f"- Maximum: {analysis['max_word_count']} words\n"
149
- output += f"- Minimum: {analysis['min_word_count']} words\n\n"
150
-
151
- output += "Most Common Words:\n"
152
  for word, count in analysis['common_words'].items():
153
- output += f"- {word}: {count} times\n"
154
-
155
- output += f"\nPrice Mentions: {analysis['price_mentions']}\n"
156
- output += f"Brand Mentions: {analysis['brand_mentions']}\n"
157
-
158
- output += f"\nCharacter Length Stats:\n"
159
- output += f"- Average: {analysis['length_stats']['avg_length']:.1f} characters\n"
160
- output += f"- Maximum: {analysis['length_stats']['max_length']} characters\n"
161
- output += f"- Minimum: {analysis['length_stats']['min_length']} characters\n"
162
-
163
  return output
164
 
165
  def create_visualization(analysis: Dict[str, Any]) -> str:
166
  """Create HTML visualization of the analysis"""
167
- html = """
168
- <style>
169
- .analysis-container {
 
 
 
170
  font-family: Arial, sans-serif;
171
  max-width: 800px;
172
  margin: 0 auto;
173
  padding: 20px;
174
- }
175
- .metric-card {
176
  background: #f5f5f5;
177
  border-radius: 8px;
178
  padding: 15px;
179
  margin: 10px 0;
180
- }
181
- .word-cloud {
 
182
  display: flex;
183
  flex-wrap: wrap;
184
  gap: 10px;
185
  margin: 10px 0;
186
- }
187
- .word-item {
188
  background: #e0e0e0;
189
  padding: 5px 10px;
190
  border-radius: 15px;
191
  font-size: 14px;
192
- }
193
- </style>
194
- <div class="analysis-container">
195
- <div class="metric-card">
196
- <h3>Overall Statistics</h3>
197
- <p>Total Suggestions: {}</p>
198
- <p>Average Words: {:.1f}</p>
199
- </div>
200
- <div class="metric-card">
201
- <h3>Most Common Words</h3>
202
- <div class="word-cloud">
203
- """.format(
204
- analysis['total_count'],
205
- analysis['avg_word_count']
206
- )
207
-
208
- for word, count in analysis['common_words'].items():
209
- html += f'<span class="word-item">{word} ({count})</span>'
210
-
211
- html += """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  </div>
213
  </div>
214
- </div>
 
215
  """
216
  return html
217
 
@@ -229,6 +160,9 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
229
  if min_length > 0:
230
  suggestions = [s for s in suggestions if len(s.split()) >= min_length]
231
 
 
 
 
232
  # Analyze suggestions
233
  analyzer = KeywordAnalyzer()
234
  analysis = analyzer.analyze_suggestions(suggestions)
@@ -248,14 +182,18 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
248
  # Add analysis results
249
  output_text += "\n" + format_analysis_output(analysis)
250
 
251
- # Create visualization
252
- visualization = create_visualization(analysis)
 
 
 
 
253
 
254
  return output_text, csv_filename, visualization
255
 
256
  except Exception as e:
257
  logging.error(f"Error in search_and_display: {str(e)}")
258
- return f"Error occurred: {str(e)}", None, None
259
 
260
  # Create Gradio interface
261
  iface = gr.Interface(
@@ -267,7 +205,7 @@ iface = gr.Interface(
267
  outputs=[
268
  gr.Textbox(label="Results", lines=20),
269
  gr.File(label="Download CSV"),
270
- gr.HTML(label="Visualization")
271
  ],
272
  title="Advanced Amazon Suggestion Expander",
273
  description="""
@@ -283,6 +221,5 @@ iface = gr.Interface(
283
  theme=gr.themes.Soft()
284
  )
285
 
286
- # Launch the app
287
  if __name__ == "__main__":
288
  iface.launch()
 
 
1
  import requests
 
2
  import pandas as pd
3
+ import logging
4
+ import gradio as gr
5
+ from typing import List, Dict, Any
6
  from datetime import datetime
7
  from collections import Counter
8
+ import re
 
 
 
9
 
10
+ logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
11
 
12
  class AmazonSuggestionExpander:
13
  def __init__(self):
14
+ self.base_url = "https://completion.amazon.com/api/2017/suggestions"
15
  self.headers = {
16
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
17
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
 
20
  try:
21
+ params = {
22
+ 'mid': 'ATVPDKIKX0DER',
23
+ 'alias': 'aps',
24
+ 'prefix': keyword
25
+ }
26
+ response = requests.get(self.base_url, headers=self.headers, params=params)
27
+ response.raise_for_status()
 
 
 
 
 
 
 
 
28
 
29
+ data = response.json()
30
+ suggestions = [item.get('suggestion', '') for item in data.get('suggestions', [])]
31
+ return [s for s in suggestions if s] # Filter out empty suggestions
32
 
33
+ except requests.RequestException as e:
34
+ logging.error(f"Request error: {str(e)}")
35
+ raise Exception(f"Failed to fetch suggestions: {str(e)}")
36
  except Exception as e:
37
+ logging.error(f"General error: {str(e)}")
38
+ raise Exception(f"An error occurred: {str(e)}")
39
 
40
  class KeywordAnalyzer:
41
+ def __init__(self):
42
+ self.common_words = set(['the', 'and', 'for', 'with', 'in', 'on', 'at', 'to'])
43
+
44
+ def analyze_suggestions(self, suggestions: List[str]) -> Dict[str, Any]:
45
+ word_counts = [len(s.split()) for s in suggestions]
46
+ lengths = [len(s) for s in suggestions]
 
47
 
48
+ # Word frequency analysis
49
  all_words = ' '.join(suggestions).lower().split()
50
+ word_freq = Counter([w for w in all_words if w not in self.common_words])
 
 
 
 
 
 
 
 
51
 
52
  analysis = {
53
  'total_count': len(suggestions),
54
+ 'avg_word_count': sum(word_counts) / len(suggestions) if suggestions else 0,
 
 
 
 
 
 
55
  'length_stats': {
56
+ 'avg_length': sum(lengths) / len(lengths) if lengths else 0,
57
+ 'max_length': max(lengths) if lengths else 0,
58
+ 'min_length': min(lengths) if lengths else 0
59
+ },
60
+ 'common_words': dict(word_freq.most_common(10)),
61
+ 'price_mentions': sum(1 for s in suggestions if '$' in s or 'price' in s.lower()),
62
+ 'brand_mentions': sum(1 for s in suggestions if any(char.isupper() for char in s.split()))
63
  }
 
64
  return analysis
65
 
66
  def format_analysis_output(analysis: Dict[str, Any]) -> str:
67
+ """Format analysis results as readable text"""
68
+ output = "\nAnalysis Results:\n"
 
69
  output += f"Total Suggestions: {analysis['total_count']}\n"
70
+ output += f"Average Words per Suggestion: {analysis['avg_word_count']:.1f}\n"
71
+ output += f"Average Length: {analysis['length_stats']['avg_length']:.1f} characters\n"
72
+ output += "\nMost Common Words:\n"
 
 
 
73
  for word, count in analysis['common_words'].items():
74
+ output += f"- {word}: {count}\n"
 
 
 
 
 
 
 
 
 
75
  return output
76
 
77
  def create_visualization(analysis: Dict[str, Any]) -> str:
78
  """Create HTML visualization of the analysis"""
79
+ html = f"""
80
+ <!DOCTYPE html>
81
+ <html>
82
+ <head>
83
+ <style>
84
+ .analysis-container {{
85
  font-family: Arial, sans-serif;
86
  max-width: 800px;
87
  margin: 0 auto;
88
  padding: 20px;
89
+ }}
90
+ .metric-card {{
91
  background: #f5f5f5;
92
  border-radius: 8px;
93
  padding: 15px;
94
  margin: 10px 0;
95
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
96
+ }}
97
+ .word-cloud {{
98
  display: flex;
99
  flex-wrap: wrap;
100
  gap: 10px;
101
  margin: 10px 0;
102
+ }}
103
+ .word-item {{
104
  background: #e0e0e0;
105
  padding: 5px 10px;
106
  border-radius: 15px;
107
  font-size: 14px;
108
+ }}
109
+ h3 {{
110
+ margin: 0 0 10px 0;
111
+ color: #333;
112
+ }}
113
+ p {{
114
+ margin: 5px 0;
115
+ color: #666;
116
+ }}
117
+ </style>
118
+ </head>
119
+ <body>
120
+ <div class="analysis-container">
121
+ <div class="metric-card">
122
+ <h3>Overall Statistics</h3>
123
+ <p>Total Suggestions: {analysis['total_count']}</p>
124
+ <p>Average Words: {analysis['avg_word_count']:.1f}</p>
125
+ <p>Price Mentions: {analysis['price_mentions']}</p>
126
+ <p>Brand Mentions: {analysis['brand_mentions']}</p>
127
+ </div>
128
+
129
+ <div class="metric-card">
130
+ <h3>Length Statistics</h3>
131
+ <p>Average Length: {analysis['length_stats']['avg_length']:.1f} characters</p>
132
+ <p>Maximum Length: {analysis['length_stats']['max_length']} characters</p>
133
+ <p>Minimum Length: {analysis['length_stats']['min_length']} characters</p>
134
+ </div>
135
+
136
+ <div class="metric-card">
137
+ <h3>Most Common Words</h3>
138
+ <div class="word-cloud">
139
+ {' '.join([f'<span class="word-item">{word} ({count})</span>'
140
+ for word, count in analysis['common_words'].items()])}
141
+ </div>
142
  </div>
143
  </div>
144
+ </body>
145
+ </html>
146
  """
147
  return html
148
 
 
160
  if min_length > 0:
161
  suggestions = [s for s in suggestions if len(s.split()) >= min_length]
162
 
163
+ if not suggestions:
164
+ return "No suggestions found", None, None
165
+
166
  # Analyze suggestions
167
  analyzer = KeywordAnalyzer()
168
  analysis = analyzer.analyze_suggestions(suggestions)
 
182
  # Add analysis results
183
  output_text += "\n" + format_analysis_output(analysis)
184
 
185
+ try:
186
+ # Create visualization
187
+ visualization = create_visualization(analysis)
188
+ except Exception as viz_error:
189
+ logging.error(f"Visualization error: {str(viz_error)}")
190
+ visualization = "<p>Error creating visualization</p>"
191
 
192
  return output_text, csv_filename, visualization
193
 
194
  except Exception as e:
195
  logging.error(f"Error in search_and_display: {str(e)}")
196
+ return f"Error occurred: {str(e)}", None, "<p>Error occurred during analysis</p>"
197
 
198
  # Create Gradio interface
199
  iface = gr.Interface(
 
205
  outputs=[
206
  gr.Textbox(label="Results", lines=20),
207
  gr.File(label="Download CSV"),
208
+ gr.HTML(label="Analysis Visualization")
209
  ],
210
  title="Advanced Amazon Suggestion Expander",
211
  description="""
 
221
  theme=gr.themes.Soft()
222
  )
223
 
 
224
  if __name__ == "__main__":
225
  iface.launch()