Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,217 +1,148 @@
|
|
1 |
-
import gradio as gr
|
2 |
import requests
|
3 |
-
import json
|
4 |
import pandas as pd
|
5 |
-
import
|
|
|
|
|
6 |
from datetime import datetime
|
7 |
from collections import Counter
|
8 |
-
import
|
9 |
-
from typing import List, Dict, Any
|
10 |
-
import logging
|
11 |
-
import sys
|
12 |
|
13 |
-
|
14 |
-
logging.basicConfig(
|
15 |
-
level=logging.INFO,
|
16 |
-
format='%(asctime)s - %(levelname)s - %(message)s',
|
17 |
-
handlers=[
|
18 |
-
logging.StreamHandler(sys.stdout)
|
19 |
-
]
|
20 |
-
)
|
21 |
|
22 |
class AmazonSuggestionExpander:
|
23 |
def __init__(self):
|
|
|
24 |
self.headers = {
|
25 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36
|
26 |
}
|
27 |
-
self.cache = {}
|
28 |
-
self.last_request_time = 0
|
29 |
-
self.request_delay = 1.0 # Minimum delay between requests
|
30 |
-
|
31 |
-
def _rate_limit(self):
|
32 |
-
"""Implement rate limiting between requests"""
|
33 |
-
current_time = time.time()
|
34 |
-
time_since_last_request = current_time - self.last_request_time
|
35 |
-
if time_since_last_request < self.request_delay:
|
36 |
-
time.sleep(self.request_delay - time_since_last_request)
|
37 |
-
self.last_request_time = time.time()
|
38 |
-
|
39 |
-
def _fetch_suggestions(self, keyword: str, max_retries: int = 3) -> List[str]:
|
40 |
-
"""Fetch suggestions from Amazon with retry logic and caching"""
|
41 |
-
if keyword in self.cache:
|
42 |
-
return self.cache[keyword]
|
43 |
-
|
44 |
-
for attempt in range(max_retries):
|
45 |
-
try:
|
46 |
-
self._rate_limit()
|
47 |
-
url = f"https://completion.amazon.com/api/2017/suggestions?mid=ATVPDKIKX0DER&alias=aps&prefix={keyword}"
|
48 |
-
response = requests.get(url, headers=self.headers, timeout=10)
|
49 |
-
|
50 |
-
if response.status_code == 429: # Too Many Requests
|
51 |
-
logging.warning(f"Rate limited on attempt {attempt + 1}")
|
52 |
-
time.sleep(30 * (attempt + 1)) # Exponential backoff
|
53 |
-
continue
|
54 |
-
|
55 |
-
data = json.loads(response.text)
|
56 |
-
suggestions = [suggestion['value'] for suggestion in data.get('suggestions', [])]
|
57 |
-
self.cache[keyword] = suggestions
|
58 |
-
return suggestions
|
59 |
-
|
60 |
-
except requests.exceptions.RequestException as e:
|
61 |
-
logging.error(f"Request error on attempt {attempt + 1}: {str(e)}")
|
62 |
-
if attempt == max_retries - 1:
|
63 |
-
raise e
|
64 |
-
time.sleep(5 * (attempt + 1)) # Exponential backoff
|
65 |
-
except json.JSONDecodeError as e:
|
66 |
-
logging.error(f"JSON decode error on attempt {attempt + 1}: {str(e)}")
|
67 |
-
if attempt == max_retries - 1:
|
68 |
-
raise e
|
69 |
-
time.sleep(5 * (attempt + 1))
|
70 |
-
|
71 |
-
return []
|
72 |
|
73 |
def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
|
74 |
-
"""Get expanded suggestions with progress tracking"""
|
75 |
try:
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
# Get suggestions for each letter of the alphabet
|
85 |
-
alpha = 'abcdefghijklmnopqrstuvwxyz'
|
86 |
-
for i, letter in enumerate(alpha):
|
87 |
-
if progress:
|
88 |
-
progress((i + 1) / len(alpha), desc=f"Processing '{keyword} {letter}'...")
|
89 |
-
extended_suggestions = self._fetch_suggestions(f"{keyword} {letter}")
|
90 |
-
suggestions.extend(extended_suggestions)
|
91 |
|
92 |
-
|
93 |
-
suggestions =
|
94 |
-
suggestions
|
95 |
|
96 |
-
|
97 |
-
|
|
|
98 |
except Exception as e:
|
99 |
-
logging.error(f"
|
100 |
-
|
101 |
|
102 |
class KeywordAnalyzer:
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
df['word_count'] = df['Suggestions'].str.split().str.len()
|
110 |
|
111 |
-
#
|
112 |
all_words = ' '.join(suggestions).lower().split()
|
113 |
-
word_freq = Counter(all_words)
|
114 |
-
|
115 |
-
# Price analysis (if present)
|
116 |
-
price_pattern = r'\$\d+\.?\d*'
|
117 |
-
df['has_price'] = df['Suggestions'].str.contains(price_pattern)
|
118 |
-
|
119 |
-
# Brand mention analysis
|
120 |
-
# This is a simple example - you might want to maintain a proper brand list
|
121 |
-
df['has_brand'] = df['Suggestions'].apply(lambda x: any(brand in x.lower() for brand in ['amazon', 'prime']))
|
122 |
|
123 |
analysis = {
|
124 |
'total_count': len(suggestions),
|
125 |
-
'avg_word_count':
|
126 |
-
'max_word_count': df['word_count'].max(),
|
127 |
-
'min_word_count': df['word_count'].min(),
|
128 |
-
'word_count_distribution': df['word_count'].value_counts().to_dict(),
|
129 |
-
'common_words': {word: count for word, count in word_freq.most_common(10)},
|
130 |
-
'price_mentions': df['has_price'].sum(),
|
131 |
-
'brand_mentions': df['has_brand'].sum(),
|
132 |
'length_stats': {
|
133 |
-
'avg_length':
|
134 |
-
'max_length':
|
135 |
-
'min_length':
|
136 |
-
}
|
|
|
|
|
|
|
137 |
}
|
138 |
-
|
139 |
return analysis
|
140 |
|
141 |
def format_analysis_output(analysis: Dict[str, Any]) -> str:
|
142 |
-
"""Format analysis results
|
143 |
-
output = "\
|
144 |
-
|
145 |
output += f"Total Suggestions: {analysis['total_count']}\n"
|
146 |
-
output += f"
|
147 |
-
output += f"
|
148 |
-
output +=
|
149 |
-
output += f"- Minimum: {analysis['min_word_count']} words\n\n"
|
150 |
-
|
151 |
-
output += "Most Common Words:\n"
|
152 |
for word, count in analysis['common_words'].items():
|
153 |
-
output += f"- {word}: {count}
|
154 |
-
|
155 |
-
output += f"\nPrice Mentions: {analysis['price_mentions']}\n"
|
156 |
-
output += f"Brand Mentions: {analysis['brand_mentions']}\n"
|
157 |
-
|
158 |
-
output += f"\nCharacter Length Stats:\n"
|
159 |
-
output += f"- Average: {analysis['length_stats']['avg_length']:.1f} characters\n"
|
160 |
-
output += f"- Maximum: {analysis['length_stats']['max_length']} characters\n"
|
161 |
-
output += f"- Minimum: {analysis['length_stats']['min_length']} characters\n"
|
162 |
-
|
163 |
return output
|
164 |
|
165 |
def create_visualization(analysis: Dict[str, Any]) -> str:
|
166 |
"""Create HTML visualization of the analysis"""
|
167 |
-
html = """
|
168 |
-
|
169 |
-
|
|
|
|
|
|
|
170 |
font-family: Arial, sans-serif;
|
171 |
max-width: 800px;
|
172 |
margin: 0 auto;
|
173 |
padding: 20px;
|
174 |
-
}
|
175 |
-
.metric-card {
|
176 |
background: #f5f5f5;
|
177 |
border-radius: 8px;
|
178 |
padding: 15px;
|
179 |
margin: 10px 0;
|
180 |
-
|
181 |
-
|
|
|
182 |
display: flex;
|
183 |
flex-wrap: wrap;
|
184 |
gap: 10px;
|
185 |
margin: 10px 0;
|
186 |
-
}
|
187 |
-
.word-item {
|
188 |
background: #e0e0e0;
|
189 |
padding: 5px 10px;
|
190 |
border-radius: 15px;
|
191 |
font-size: 14px;
|
192 |
-
}
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
analysis
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
212 |
</div>
|
213 |
</div>
|
214 |
-
</
|
|
|
215 |
"""
|
216 |
return html
|
217 |
|
@@ -229,6 +160,9 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
|
|
229 |
if min_length > 0:
|
230 |
suggestions = [s for s in suggestions if len(s.split()) >= min_length]
|
231 |
|
|
|
|
|
|
|
232 |
# Analyze suggestions
|
233 |
analyzer = KeywordAnalyzer()
|
234 |
analysis = analyzer.analyze_suggestions(suggestions)
|
@@ -248,14 +182,18 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
|
|
248 |
# Add analysis results
|
249 |
output_text += "\n" + format_analysis_output(analysis)
|
250 |
|
251 |
-
|
252 |
-
|
|
|
|
|
|
|
|
|
253 |
|
254 |
return output_text, csv_filename, visualization
|
255 |
|
256 |
except Exception as e:
|
257 |
logging.error(f"Error in search_and_display: {str(e)}")
|
258 |
-
return f"Error occurred: {str(e)}", None,
|
259 |
|
260 |
# Create Gradio interface
|
261 |
iface = gr.Interface(
|
@@ -267,7 +205,7 @@ iface = gr.Interface(
|
|
267 |
outputs=[
|
268 |
gr.Textbox(label="Results", lines=20),
|
269 |
gr.File(label="Download CSV"),
|
270 |
-
gr.HTML(label="Visualization")
|
271 |
],
|
272 |
title="Advanced Amazon Suggestion Expander",
|
273 |
description="""
|
@@ -283,6 +221,5 @@ iface = gr.Interface(
|
|
283 |
theme=gr.themes.Soft()
|
284 |
)
|
285 |
|
286 |
-
# Launch the app
|
287 |
if __name__ == "__main__":
|
288 |
iface.launch()
|
|
|
|
|
1 |
import requests
|
|
|
2 |
import pandas as pd
|
3 |
+
import logging
|
4 |
+
import gradio as gr
|
5 |
+
from typing import List, Dict, Any
|
6 |
from datetime import datetime
|
7 |
from collections import Counter
|
8 |
+
import re
|
|
|
|
|
|
|
9 |
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
class AmazonSuggestionExpander:
|
13 |
def __init__(self):
|
14 |
+
self.base_url = "https://completion.amazon.com/api/2017/suggestions"
|
15 |
self.headers = {
|
16 |
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
17 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
|
|
|
20 |
try:
|
21 |
+
params = {
|
22 |
+
'mid': 'ATVPDKIKX0DER',
|
23 |
+
'alias': 'aps',
|
24 |
+
'prefix': keyword
|
25 |
+
}
|
26 |
+
response = requests.get(self.base_url, headers=self.headers, params=params)
|
27 |
+
response.raise_for_status()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
data = response.json()
|
30 |
+
suggestions = [item.get('suggestion', '') for item in data.get('suggestions', [])]
|
31 |
+
return [s for s in suggestions if s] # Filter out empty suggestions
|
32 |
|
33 |
+
except requests.RequestException as e:
|
34 |
+
logging.error(f"Request error: {str(e)}")
|
35 |
+
raise Exception(f"Failed to fetch suggestions: {str(e)}")
|
36 |
except Exception as e:
|
37 |
+
logging.error(f"General error: {str(e)}")
|
38 |
+
raise Exception(f"An error occurred: {str(e)}")
|
39 |
|
40 |
class KeywordAnalyzer:
|
41 |
+
def __init__(self):
|
42 |
+
self.common_words = set(['the', 'and', 'for', 'with', 'in', 'on', 'at', 'to'])
|
43 |
+
|
44 |
+
def analyze_suggestions(self, suggestions: List[str]) -> Dict[str, Any]:
|
45 |
+
word_counts = [len(s.split()) for s in suggestions]
|
46 |
+
lengths = [len(s) for s in suggestions]
|
|
|
47 |
|
48 |
+
# Word frequency analysis
|
49 |
all_words = ' '.join(suggestions).lower().split()
|
50 |
+
word_freq = Counter([w for w in all_words if w not in self.common_words])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
analysis = {
|
53 |
'total_count': len(suggestions),
|
54 |
+
'avg_word_count': sum(word_counts) / len(suggestions) if suggestions else 0,
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
'length_stats': {
|
56 |
+
'avg_length': sum(lengths) / len(lengths) if lengths else 0,
|
57 |
+
'max_length': max(lengths) if lengths else 0,
|
58 |
+
'min_length': min(lengths) if lengths else 0
|
59 |
+
},
|
60 |
+
'common_words': dict(word_freq.most_common(10)),
|
61 |
+
'price_mentions': sum(1 for s in suggestions if '$' in s or 'price' in s.lower()),
|
62 |
+
'brand_mentions': sum(1 for s in suggestions if any(char.isupper() for char in s.split()))
|
63 |
}
|
|
|
64 |
return analysis
|
65 |
|
66 |
def format_analysis_output(analysis: Dict[str, Any]) -> str:
|
67 |
+
"""Format analysis results as readable text"""
|
68 |
+
output = "\nAnalysis Results:\n"
|
|
|
69 |
output += f"Total Suggestions: {analysis['total_count']}\n"
|
70 |
+
output += f"Average Words per Suggestion: {analysis['avg_word_count']:.1f}\n"
|
71 |
+
output += f"Average Length: {analysis['length_stats']['avg_length']:.1f} characters\n"
|
72 |
+
output += "\nMost Common Words:\n"
|
|
|
|
|
|
|
73 |
for word, count in analysis['common_words'].items():
|
74 |
+
output += f"- {word}: {count}\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
return output
|
76 |
|
77 |
def create_visualization(analysis: Dict[str, Any]) -> str:
|
78 |
"""Create HTML visualization of the analysis"""
|
79 |
+
html = f"""
|
80 |
+
<!DOCTYPE html>
|
81 |
+
<html>
|
82 |
+
<head>
|
83 |
+
<style>
|
84 |
+
.analysis-container {{
|
85 |
font-family: Arial, sans-serif;
|
86 |
max-width: 800px;
|
87 |
margin: 0 auto;
|
88 |
padding: 20px;
|
89 |
+
}}
|
90 |
+
.metric-card {{
|
91 |
background: #f5f5f5;
|
92 |
border-radius: 8px;
|
93 |
padding: 15px;
|
94 |
margin: 10px 0;
|
95 |
+
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
96 |
+
}}
|
97 |
+
.word-cloud {{
|
98 |
display: flex;
|
99 |
flex-wrap: wrap;
|
100 |
gap: 10px;
|
101 |
margin: 10px 0;
|
102 |
+
}}
|
103 |
+
.word-item {{
|
104 |
background: #e0e0e0;
|
105 |
padding: 5px 10px;
|
106 |
border-radius: 15px;
|
107 |
font-size: 14px;
|
108 |
+
}}
|
109 |
+
h3 {{
|
110 |
+
margin: 0 0 10px 0;
|
111 |
+
color: #333;
|
112 |
+
}}
|
113 |
+
p {{
|
114 |
+
margin: 5px 0;
|
115 |
+
color: #666;
|
116 |
+
}}
|
117 |
+
</style>
|
118 |
+
</head>
|
119 |
+
<body>
|
120 |
+
<div class="analysis-container">
|
121 |
+
<div class="metric-card">
|
122 |
+
<h3>Overall Statistics</h3>
|
123 |
+
<p>Total Suggestions: {analysis['total_count']}</p>
|
124 |
+
<p>Average Words: {analysis['avg_word_count']:.1f}</p>
|
125 |
+
<p>Price Mentions: {analysis['price_mentions']}</p>
|
126 |
+
<p>Brand Mentions: {analysis['brand_mentions']}</p>
|
127 |
+
</div>
|
128 |
+
|
129 |
+
<div class="metric-card">
|
130 |
+
<h3>Length Statistics</h3>
|
131 |
+
<p>Average Length: {analysis['length_stats']['avg_length']:.1f} characters</p>
|
132 |
+
<p>Maximum Length: {analysis['length_stats']['max_length']} characters</p>
|
133 |
+
<p>Minimum Length: {analysis['length_stats']['min_length']} characters</p>
|
134 |
+
</div>
|
135 |
+
|
136 |
+
<div class="metric-card">
|
137 |
+
<h3>Most Common Words</h3>
|
138 |
+
<div class="word-cloud">
|
139 |
+
{' '.join([f'<span class="word-item">{word} ({count})</span>'
|
140 |
+
for word, count in analysis['common_words'].items()])}
|
141 |
+
</div>
|
142 |
</div>
|
143 |
</div>
|
144 |
+
</body>
|
145 |
+
</html>
|
146 |
"""
|
147 |
return html
|
148 |
|
|
|
160 |
if min_length > 0:
|
161 |
suggestions = [s for s in suggestions if len(s.split()) >= min_length]
|
162 |
|
163 |
+
if not suggestions:
|
164 |
+
return "No suggestions found", None, None
|
165 |
+
|
166 |
# Analyze suggestions
|
167 |
analyzer = KeywordAnalyzer()
|
168 |
analysis = analyzer.analyze_suggestions(suggestions)
|
|
|
182 |
# Add analysis results
|
183 |
output_text += "\n" + format_analysis_output(analysis)
|
184 |
|
185 |
+
try:
|
186 |
+
# Create visualization
|
187 |
+
visualization = create_visualization(analysis)
|
188 |
+
except Exception as viz_error:
|
189 |
+
logging.error(f"Visualization error: {str(viz_error)}")
|
190 |
+
visualization = "<p>Error creating visualization</p>"
|
191 |
|
192 |
return output_text, csv_filename, visualization
|
193 |
|
194 |
except Exception as e:
|
195 |
logging.error(f"Error in search_and_display: {str(e)}")
|
196 |
+
return f"Error occurred: {str(e)}", None, "<p>Error occurred during analysis</p>"
|
197 |
|
198 |
# Create Gradio interface
|
199 |
iface = gr.Interface(
|
|
|
205 |
outputs=[
|
206 |
gr.Textbox(label="Results", lines=20),
|
207 |
gr.File(label="Download CSV"),
|
208 |
+
gr.HTML(label="Analysis Visualization")
|
209 |
],
|
210 |
title="Advanced Amazon Suggestion Expander",
|
211 |
description="""
|
|
|
221 |
theme=gr.themes.Soft()
|
222 |
)
|
223 |
|
|
|
224 |
if __name__ == "__main__":
|
225 |
iface.launch()
|