Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,42 +1,288 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
3 |
-
import
|
4 |
-
import
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
iface = gr.Interface(
|
35 |
-
fn=
|
36 |
-
inputs=
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
)
|
41 |
|
42 |
-
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
+
import requests
|
3 |
+
import json
|
4 |
+
import pandas as pd
|
5 |
+
import time
|
6 |
+
from datetime import datetime
|
7 |
+
from collections import Counter
|
8 |
+
import numpy as np
|
9 |
+
from typing import List, Dict, Any
|
10 |
+
import logging
|
11 |
+
import sys
|
12 |
+
|
13 |
+
# Set up logging
|
14 |
+
logging.basicConfig(
|
15 |
+
level=logging.INFO,
|
16 |
+
format='%(asctime)s - %(levelname)s - %(message)s',
|
17 |
+
handlers=[
|
18 |
+
logging.StreamHandler(sys.stdout)
|
19 |
+
]
|
20 |
+
)
|
21 |
+
|
22 |
+
class AmazonSuggestionExpander:
|
23 |
+
def __init__(self):
|
24 |
+
self.headers = {
|
25 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
26 |
+
}
|
27 |
+
self.cache = {}
|
28 |
+
self.last_request_time = 0
|
29 |
+
self.request_delay = 1.0 # Minimum delay between requests
|
30 |
+
|
31 |
+
def _rate_limit(self):
|
32 |
+
"""Implement rate limiting between requests"""
|
33 |
+
current_time = time.time()
|
34 |
+
time_since_last_request = current_time - self.last_request_time
|
35 |
+
if time_since_last_request < self.request_delay:
|
36 |
+
time.sleep(self.request_delay - time_since_last_request)
|
37 |
+
self.last_request_time = time.time()
|
38 |
+
|
39 |
+
def _fetch_suggestions(self, keyword: str, max_retries: int = 3) -> List[str]:
|
40 |
+
"""Fetch suggestions from Amazon with retry logic and caching"""
|
41 |
+
if keyword in self.cache:
|
42 |
+
return self.cache[keyword]
|
43 |
+
|
44 |
+
for attempt in range(max_retries):
|
45 |
+
try:
|
46 |
+
self._rate_limit()
|
47 |
+
url = f"https://completion.amazon.com/api/2017/suggestions?mid=ATVPDKIKX0DER&alias=aps&prefix={keyword}"
|
48 |
+
response = requests.get(url, headers=self.headers, timeout=10)
|
49 |
+
|
50 |
+
if response.status_code == 429: # Too Many Requests
|
51 |
+
logging.warning(f"Rate limited on attempt {attempt + 1}")
|
52 |
+
time.sleep(30 * (attempt + 1)) # Exponential backoff
|
53 |
+
continue
|
54 |
+
|
55 |
+
data = json.loads(response.text)
|
56 |
+
suggestions = [suggestion['value'] for suggestion in data.get('suggestions', [])]
|
57 |
+
self.cache[keyword] = suggestions
|
58 |
+
return suggestions
|
59 |
+
|
60 |
+
except requests.exceptions.RequestException as e:
|
61 |
+
logging.error(f"Request error on attempt {attempt + 1}: {str(e)}")
|
62 |
+
if attempt == max_retries - 1:
|
63 |
+
raise e
|
64 |
+
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
65 |
+
except json.JSONDecodeError as e:
|
66 |
+
logging.error(f"JSON decode error on attempt {attempt + 1}: {str(e)}")
|
67 |
+
if attempt == max_retries - 1:
|
68 |
+
raise e
|
69 |
+
time.sleep(5 * (attempt + 1))
|
70 |
+
|
71 |
+
return []
|
72 |
+
|
73 |
+
def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
|
74 |
+
"""Get expanded suggestions with progress tracking"""
|
75 |
+
try:
|
76 |
+
suggestions = []
|
77 |
+
if progress:
|
78 |
+
progress(0, desc="Getting initial suggestions...")
|
79 |
+
|
80 |
+
# Get suggestions for the main keyword
|
81 |
+
main_suggestions = self._fetch_suggestions(keyword)
|
82 |
+
suggestions.extend(main_suggestions)
|
83 |
+
|
84 |
+
# Get suggestions for each letter of the alphabet
|
85 |
+
alpha = 'abcdefghijklmnopqrstuvwxyz'
|
86 |
+
for i, letter in enumerate(alpha):
|
87 |
+
if progress:
|
88 |
+
progress((i + 1) / len(alpha), desc=f"Processing '{keyword} {letter}'...")
|
89 |
+
extended_suggestions = self._fetch_suggestions(f"{keyword} {letter}")
|
90 |
+
suggestions.extend(extended_suggestions)
|
91 |
+
|
92 |
+
# Remove duplicates and sort
|
93 |
+
suggestions = list(set(suggestions))
|
94 |
+
suggestions.sort()
|
95 |
+
|
96 |
+
return suggestions
|
97 |
+
|
98 |
+
except Exception as e:
|
99 |
+
logging.error(f"Error in get_amazon_suggestions: {str(e)}")
|
100 |
+
return [f"Error: {str(e)}"]
|
101 |
+
|
102 |
+
class KeywordAnalyzer:
|
103 |
+
@staticmethod
|
104 |
+
def analyze_suggestions(suggestions: List[str]) -> Dict[str, Any]:
|
105 |
+
"""Analyze suggestions and return detailed metrics"""
|
106 |
+
df = pd.DataFrame(suggestions, columns=['Suggestions'])
|
107 |
+
|
108 |
+
# Word count analysis
|
109 |
+
df['word_count'] = df['Suggestions'].str.split().str.len()
|
110 |
+
|
111 |
+
# Create word frequency analysis
|
112 |
+
all_words = ' '.join(suggestions).lower().split()
|
113 |
+
word_freq = Counter(all_words)
|
114 |
+
|
115 |
+
# Price analysis (if present)
|
116 |
+
price_pattern = r'\$\d+\.?\d*'
|
117 |
+
df['has_price'] = df['Suggestions'].str.contains(price_pattern)
|
118 |
+
|
119 |
+
# Brand mention analysis
|
120 |
+
# This is a simple example - you might want to maintain a proper brand list
|
121 |
+
df['has_brand'] = df['Suggestions'].apply(lambda x: any(brand in x.lower() for brand in ['amazon', 'prime']))
|
122 |
+
|
123 |
+
analysis = {
|
124 |
+
'total_count': len(suggestions),
|
125 |
+
'avg_word_count': df['word_count'].mean(),
|
126 |
+
'max_word_count': df['word_count'].max(),
|
127 |
+
'min_word_count': df['word_count'].min(),
|
128 |
+
'word_count_distribution': df['word_count'].value_counts().to_dict(),
|
129 |
+
'common_words': {word: count for word, count in word_freq.most_common(10)},
|
130 |
+
'price_mentions': df['has_price'].sum(),
|
131 |
+
'brand_mentions': df['has_brand'].sum(),
|
132 |
+
'length_stats': {
|
133 |
+
'avg_length': df['Suggestions'].str.len().mean(),
|
134 |
+
'max_length': df['Suggestions'].str.len().max(),
|
135 |
+
'min_length': df['Suggestions'].str.len().min()
|
136 |
+
}
|
137 |
+
}
|
138 |
+
|
139 |
+
return analysis
|
140 |
+
|
141 |
+
def format_analysis_output(analysis: Dict[str, Any]) -> str:
|
142 |
+
"""Format analysis results into readable text"""
|
143 |
+
output = "\n=== Analysis Results ===\n\n"
|
144 |
+
|
145 |
+
output += f"Total Suggestions: {analysis['total_count']}\n"
|
146 |
+
output += f"Word Count Stats:\n"
|
147 |
+
output += f"- Average: {analysis['avg_word_count']:.1f} words\n"
|
148 |
+
output += f"- Maximum: {analysis['max_word_count']} words\n"
|
149 |
+
output += f"- Minimum: {analysis['min_word_count']} words\n\n"
|
150 |
+
|
151 |
+
output += "Most Common Words:\n"
|
152 |
+
for word, count in analysis['common_words'].items():
|
153 |
+
output += f"- {word}: {count} times\n"
|
154 |
+
|
155 |
+
output += f"\nPrice Mentions: {analysis['price_mentions']}\n"
|
156 |
+
output += f"Brand Mentions: {analysis['brand_mentions']}\n"
|
157 |
+
|
158 |
+
output += f"\nCharacter Length Stats:\n"
|
159 |
+
output += f"- Average: {analysis['length_stats']['avg_length']:.1f} characters\n"
|
160 |
+
output += f"- Maximum: {analysis['length_stats']['max_length']} characters\n"
|
161 |
+
output += f"- Minimum: {analysis['length_stats']['min_length']} characters\n"
|
162 |
+
|
163 |
+
return output
|
164 |
+
|
165 |
+
def create_visualization(analysis: Dict[str, Any]) -> str:
|
166 |
+
"""Create HTML visualization of the analysis"""
|
167 |
+
html = """
|
168 |
+
<style>
|
169 |
+
.analysis-container {
|
170 |
+
font-family: Arial, sans-serif;
|
171 |
+
max-width: 800px;
|
172 |
+
margin: 0 auto;
|
173 |
+
padding: 20px;
|
174 |
+
}
|
175 |
+
.metric-card {
|
176 |
+
background: #f5f5f5;
|
177 |
+
border-radius: 8px;
|
178 |
+
padding: 15px;
|
179 |
+
margin: 10px 0;
|
180 |
+
}
|
181 |
+
.word-cloud {
|
182 |
+
display: flex;
|
183 |
+
flex-wrap: wrap;
|
184 |
+
gap: 10px;
|
185 |
+
margin: 10px 0;
|
186 |
+
}
|
187 |
+
.word-item {
|
188 |
+
background: #e0e0e0;
|
189 |
+
padding: 5px 10px;
|
190 |
+
border-radius: 15px;
|
191 |
+
font-size: 14px;
|
192 |
+
}
|
193 |
+
</style>
|
194 |
+
<div class="analysis-container">
|
195 |
+
<div class="metric-card">
|
196 |
+
<h3>Overall Statistics</h3>
|
197 |
+
<p>Total Suggestions: {}</p>
|
198 |
+
<p>Average Words: {:.1f}</p>
|
199 |
+
</div>
|
200 |
+
<div class="metric-card">
|
201 |
+
<h3>Most Common Words</h3>
|
202 |
+
<div class="word-cloud">
|
203 |
+
""".format(
|
204 |
+
analysis['total_count'],
|
205 |
+
analysis['avg_word_count']
|
206 |
+
)
|
207 |
+
|
208 |
+
for word, count in analysis['common_words'].items():
|
209 |
+
html += f'<span class="word-item">{word} ({count})</span>'
|
210 |
+
|
211 |
+
html += """
|
212 |
+
</div>
|
213 |
+
</div>
|
214 |
+
</div>
|
215 |
+
"""
|
216 |
+
return html
|
217 |
+
|
218 |
+
def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress = gr.Progress()) -> tuple:
|
219 |
+
"""Main function to handle the search and display process"""
|
220 |
+
if not keyword:
|
221 |
+
return "Please enter a keyword", None, None
|
222 |
+
|
223 |
+
try:
|
224 |
+
# Initialize expander and get suggestions
|
225 |
+
expander = AmazonSuggestionExpander()
|
226 |
+
suggestions = expander.get_amazon_suggestions(keyword, progress)
|
227 |
+
|
228 |
+
# Filter suggestions by minimum length if specified
|
229 |
+
if min_length > 0:
|
230 |
+
suggestions = [s for s in suggestions if len(s.split()) >= min_length]
|
231 |
+
|
232 |
+
# Analyze suggestions
|
233 |
+
analyzer = KeywordAnalyzer()
|
234 |
+
analysis = analyzer.analyze_suggestions(suggestions)
|
235 |
+
|
236 |
+
# Create DataFrame and save to CSV
|
237 |
+
df = pd.DataFrame(suggestions, columns=['Suggestions'])
|
238 |
+
df['Word Count'] = df['Suggestions'].str.split().str.len()
|
239 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
240 |
+
csv_filename = f"amazon_suggestions_{timestamp}.csv"
|
241 |
+
df.to_csv(csv_filename, index=False)
|
242 |
+
|
243 |
+
# Create output text
|
244 |
+
output_text = f"Found {len(suggestions)} suggestions for '{keyword}':\n\n"
|
245 |
+
for i, suggestion in enumerate(suggestions, 1):
|
246 |
+
output_text += f"{i}. {suggestion}\n"
|
247 |
+
|
248 |
+
# Add analysis results
|
249 |
+
output_text += "\n" + format_analysis_output(analysis)
|
250 |
+
|
251 |
+
# Create visualization
|
252 |
+
visualization = create_visualization(analysis)
|
253 |
+
|
254 |
+
return output_text, csv_filename, visualization
|
255 |
+
|
256 |
+
except Exception as e:
|
257 |
+
logging.error(f"Error in search_and_display: {str(e)}")
|
258 |
+
return f"Error occurred: {str(e)}", None, None
|
259 |
+
|
260 |
+
# Create Gradio interface
|
261 |
iface = gr.Interface(
|
262 |
+
fn=search_and_display,
|
263 |
+
inputs=[
|
264 |
+
gr.Textbox(label="Enter keyword", placeholder="Type your keyword here..."),
|
265 |
+
gr.Slider(minimum=0, maximum=10, step=1, label="Minimum word count filter", value=0)
|
266 |
+
],
|
267 |
+
outputs=[
|
268 |
+
gr.Textbox(label="Results", lines=20),
|
269 |
+
gr.File(label="Download CSV"),
|
270 |
+
gr.HTML(label="Visualization")
|
271 |
+
],
|
272 |
+
title="Advanced Amazon Suggestion Expander",
|
273 |
+
description="""
|
274 |
+
Get expanded keyword suggestions from Amazon's search autocomplete with detailed analysis.
|
275 |
+
Enter a keyword to see all related suggestions, analytics, and visualizations.
|
276 |
+
""",
|
277 |
+
examples=[
|
278 |
+
["coffee maker", 2],
|
279 |
+
["gaming laptop", 3],
|
280 |
+
["yoga mat", 1]
|
281 |
+
],
|
282 |
+
cache_examples=True,
|
283 |
+
theme=gr.themes.Soft()
|
284 |
)
|
285 |
|
286 |
+
# Launch the app
|
287 |
+
if __name__ == "__main__":
|
288 |
+
iface.launch()
|