Spaces:

Besimplestudio
/

Kdp

Sleeping

App Files Files Community

Besimplestudio commited on Nov 16, 2024

Commit

a97da81

verified ·

1 Parent(s): 3f54538

Update app.py

Browse files

Files changed (1) hide show

app.py +104 -167

app.py CHANGED Viewed

@@ -1,217 +1,148 @@
-import gradio as gr
 import requests
-import json
 import pandas as pd
-import time
 from datetime import datetime
 from collections import Counter
-import numpy as np
-from typing import List, Dict, Any
-import logging
-import sys
-# Set up logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    handlers=[
-        logging.StreamHandler(sys.stdout)
-    ]
-)
 class AmazonSuggestionExpander:
     def __init__(self):
         self.headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
-        self.cache = {}
-        self.last_request_time = 0
-        self.request_delay = 1.0  # Minimum delay between requests
-    def _rate_limit(self):
-        """Implement rate limiting between requests"""
-        current_time = time.time()
-        time_since_last_request = current_time - self.last_request_time
-        if time_since_last_request < self.request_delay:
-            time.sleep(self.request_delay - time_since_last_request)
-        self.last_request_time = time.time()
-    def _fetch_suggestions(self, keyword: str, max_retries: int = 3) -> List[str]:
-        """Fetch suggestions from Amazon with retry logic and caching"""
-        if keyword in self.cache:
-            return self.cache[keyword]
-        for attempt in range(max_retries):
-            try:
-                self._rate_limit()
-                url = f"https://completion.amazon.com/api/2017/suggestions?mid=ATVPDKIKX0DER&alias=aps&prefix={keyword}"
-                response = requests.get(url, headers=self.headers, timeout=10)
-                if response.status_code == 429:  # Too Many Requests
-                    logging.warning(f"Rate limited on attempt {attempt + 1}")
-                    time.sleep(30 * (attempt + 1))  # Exponential backoff
-                    continue
-                data = json.loads(response.text)
-                suggestions = [suggestion['value'] for suggestion in data.get('suggestions', [])]
-                self.cache[keyword] = suggestions
-                return suggestions
-            except requests.exceptions.RequestException as e:
-                logging.error(f"Request error on attempt {attempt + 1}: {str(e)}")
-                if attempt == max_retries - 1:
-                    raise e
-                time.sleep(5 * (attempt + 1))  # Exponential backoff
-            except json.JSONDecodeError as e:
-                logging.error(f"JSON decode error on attempt {attempt + 1}: {str(e)}")
-                if attempt == max_retries - 1:
-                    raise e
-                time.sleep(5 * (attempt + 1))
-        return []
     def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
-        """Get expanded suggestions with progress tracking"""
         try:
-            suggestions = []
-            if progress:
-                progress(0, desc="Getting initial suggestions...")
-            # Get suggestions for the main keyword
-            main_suggestions = self._fetch_suggestions(keyword)
-            suggestions.extend(main_suggestions)
-            # Get suggestions for each letter of the alphabet
-            alpha = 'abcdefghijklmnopqrstuvwxyz'
-            for i, letter in enumerate(alpha):
-                if progress:
-                    progress((i + 1) / len(alpha), desc=f"Processing '{keyword} {letter}'...")
-                extended_suggestions = self._fetch_suggestions(f"{keyword} {letter}")
-                suggestions.extend(extended_suggestions)
-            # Remove duplicates and sort
-            suggestions = list(set(suggestions))
-            suggestions.sort()
-            return suggestions
         except Exception as e:
-            logging.error(f"Error in get_amazon_suggestions: {str(e)}")
-            return [f"Error: {str(e)}"]
 class KeywordAnalyzer:
-    @staticmethod
-    def analyze_suggestions(suggestions: List[str]) -> Dict[str, Any]:
-        """Analyze suggestions and return detailed metrics"""
-        df = pd.DataFrame(suggestions, columns=['Suggestions'])
-        # Word count analysis
-        df['word_count'] = df['Suggestions'].str.split().str.len()
-        # Create word frequency analysis
         all_words = ' '.join(suggestions).lower().split()
-        word_freq = Counter(all_words)
-        # Price analysis (if present)
-        price_pattern = r'\$\d+\.?\d*'
-        df['has_price'] = df['Suggestions'].str.contains(price_pattern)
-        # Brand mention analysis
-        # This is a simple example - you might want to maintain a proper brand list
-        df['has_brand'] = df['Suggestions'].apply(lambda x: any(brand in x.lower() for brand in ['amazon', 'prime']))
         analysis = {
             'total_count': len(suggestions),
-            'avg_word_count': df['word_count'].mean(),
-            'max_word_count': df['word_count'].max(),
-            'min_word_count': df['word_count'].min(),
-            'word_count_distribution': df['word_count'].value_counts().to_dict(),
-            'common_words': {word: count for word, count in word_freq.most_common(10)},
-            'price_mentions': df['has_price'].sum(),
-            'brand_mentions': df['has_brand'].sum(),
             'length_stats': {
-                'avg_length': df['Suggestions'].str.len().mean(),
-                'max_length': df['Suggestions'].str.len().max(),
-                'min_length': df['Suggestions'].str.len().min()
-            }
         }
         return analysis
 def format_analysis_output(analysis: Dict[str, Any]) -> str:
-    """Format analysis results into readable text"""
-    output = "\n=== Analysis Results ===\n\n"
     output += f"Total Suggestions: {analysis['total_count']}\n"
-    output += f"Word Count Stats:\n"
-    output += f"- Average: {analysis['avg_word_count']:.1f} words\n"
-    output += f"- Maximum: {analysis['max_word_count']} words\n"
-    output += f"- Minimum: {analysis['min_word_count']} words\n\n"
-    output += "Most Common Words:\n"
     for word, count in analysis['common_words'].items():
-        output += f"- {word}: {count} times\n"
-    output += f"\nPrice Mentions: {analysis['price_mentions']}\n"
-    output += f"Brand Mentions: {analysis['brand_mentions']}\n"
-    output += f"\nCharacter Length Stats:\n"
-    output += f"- Average: {analysis['length_stats']['avg_length']:.1f} characters\n"
-    output += f"- Maximum: {analysis['length_stats']['max_length']} characters\n"
-    output += f"- Minimum: {analysis['length_stats']['min_length']} characters\n"
     return output
 def create_visualization(analysis: Dict[str, Any]) -> str:
     """Create HTML visualization of the analysis"""
-    html = """
-    <style>
-        .analysis-container {
             font-family: Arial, sans-serif;
             max-width: 800px;
             margin: 0 auto;
             padding: 20px;
-        }
-        .metric-card {
             background: #f5f5f5;
             border-radius: 8px;
             padding: 15px;
             margin: 10px 0;
-        }
-        .word-cloud {
             display: flex;
             flex-wrap: wrap;
             gap: 10px;
             margin: 10px 0;
-        }
-        .word-item {
             background: #e0e0e0;
             padding: 5px 10px;
             border-radius: 15px;
             font-size: 14px;
-        }
-    </style>
-    <div class="analysis-container">
-        <div class="metric-card">
-            <h3>Overall Statistics</h3>
-            <p>Total Suggestions: {}</p>
-            <p>Average Words: {:.1f}</p>
-        </div>
-        <div class="metric-card">
-            <h3>Most Common Words</h3>
-            <div class="word-cloud">
-    """.format(
-        analysis['total_count'],
-        analysis['avg_word_count']
-    )
-    for word, count in analysis['common_words'].items():
-        html += f'<span class="word-item">{word} ({count})</span>'
-    html += """
             </div>
         </div>
-    </div>
     """
     return html
@@ -229,6 +160,9 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
         if min_length > 0:
             suggestions = [s for s in suggestions if len(s.split()) >= min_length]
         # Analyze suggestions
         analyzer = KeywordAnalyzer()
         analysis = analyzer.analyze_suggestions(suggestions)
@@ -248,14 +182,18 @@ def search_and_display(keyword: str, min_length: int = 0, progress: gr.Progress
         # Add analysis results
         output_text += "\n" + format_analysis_output(analysis)
-        # Create visualization
-        visualization = create_visualization(analysis)
         return output_text, csv_filename, visualization
     except Exception as e:
         logging.error(f"Error in search_and_display: {str(e)}")
-        return f"Error occurred: {str(e)}", None, None
 # Create Gradio interface
 iface = gr.Interface(
@@ -267,7 +205,7 @@ iface = gr.Interface(
     outputs=[
         gr.Textbox(label="Results", lines=20),
         gr.File(label="Download CSV"),
-        gr.HTML(label="Visualization")
     ],
     title="Advanced Amazon Suggestion Expander",
     description="""
@@ -283,6 +221,5 @@ iface = gr.Interface(
     theme=gr.themes.Soft()
 )
-# Launch the app
 if __name__ == "__main__":
     iface.launch()

 import requests
 import pandas as pd
+import logging
+import gradio as gr
+from typing import List, Dict, Any
 from datetime import datetime
 from collections import Counter
+import re
+logging.basicConfig(level=logging.INFO)
 class AmazonSuggestionExpander:
     def __init__(self):
+        self.base_url = "https://completion.amazon.com/api/2017/suggestions"
         self.headers = {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
         }
     def get_amazon_suggestions(self, keyword: str, progress: gr.Progress = None) -> List[str]:
         try:
+            params = {
+                'mid': 'ATVPDKIKX0DER',
+                'alias': 'aps',
+                'prefix': keyword
+            }
+            response = requests.get(self.base_url, headers=self.headers, params=params)
+            response.raise_for_status()
+            data = response.json()
+            suggestions = [item.get('suggestion', '') for item in data.get('suggestions', [])]
+            return [s for s in suggestions if s]  # Filter out empty suggestions
+        except requests.RequestException as e:
+            logging.error(f"Request error: {str(e)}")
+            raise Exception(f"Failed to fetch suggestions: {str(e)}")
         except Exception as e:
+            logging.error(f"General error: {str(e)}")
+            raise Exception(f"An error occurred: {str(e)}")
 class KeywordAnalyzer:
+    def __init__(self):
+        self.common_words = set(['the', 'and', 'for', 'with', 'in', 'on', 'at', 'to'])
+    def analyze_suggestions(self, suggestions: List[str]) -> Dict[str, Any]:
+        word_counts = [len(s.split()) for s in suggestions]
+        lengths = [len(s) for s in suggestions]
+        # Word frequency analysis
         all_words = ' '.join(suggestions).lower().split()
+        word_freq = Counter([w for w in all_words if w not in self.common_words])
         analysis = {
             'total_count': len(suggestions),
+            'avg_word_count': sum(word_counts) / len(suggestions) if suggestions else 0,
             'length_stats': {
+                'avg_length': sum(lengths) / len(lengths) if lengths else 0,
+                'max_length': max(lengths) if lengths else 0,
+                'min_length': min(lengths) if lengths else 0
+            },
+            'common_words': dict(word_freq.most_common(10)),
+            'price_mentions': sum(1 for s in suggestions if '$' in s or 'price' in s.lower()),
+            'brand_mentions': sum(1 for s in suggestions if any(char.isupper() for char in s.split()))
         }
         return analysis
 def format_analysis_output(analysis: Dict[str, Any]) -> str:
+    """Format analysis results as readable text"""
+    output = "\nAnalysis Results:\n"
     output += f"Total Suggestions: {analysis['total_count']}\n"
+    output += f"Average Words per Suggestion: {analysis['avg_word_count']:.1f}\n"
+    output += f"Average Length: {analysis['length_stats']['avg_length']:.1f} characters\n"
+    output += "\nMost Common Words:\n"
     for word, count in analysis['common_words'].items():
+        output += f"- {word}: {count}\n"
     return output
 def create_visualization(analysis: Dict[str, Any]) -> str:
     """Create HTML visualization of the analysis"""
+    html = f"""
+    <!DOCTYPE html>
+    <html>
+    <head>
+        <style>
+        .analysis-container {{
             font-family: Arial, sans-serif;
             max-width: 800px;
             margin: 0 auto;
             padding: 20px;
+        }}
+        .metric-card {{
             background: #f5f5f5;
             border-radius: 8px;
             padding: 15px;
             margin: 10px 0;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }}
+        .word-cloud {{
             display: flex;
             flex-wrap: wrap;
             gap: 10px;
             margin: 10px 0;
+        }}
+        .word-item {{
             background: #e0e0e0;
             padding: 5px 10px;
             border-radius: 15px;
             font-size: 14px;
+        }}
+        h3 {{
+            margin: 0 0 10px 0;
+            color: #333;
+        }}
+        p {{
+            margin: 5px 0;
+            color: #666;
+        }}
+        </style>
+    </head>
+    <body>
+        <div class="analysis-container">
+            <div class="metric-card">
+                <h3>Overall Statistics</h3>
+                <p>Total Suggestions: {analysis['total_count']}</p>
+                <p>Average Words: {analysis['avg_word_count']:.1f}</p>
+                <p>Price Mentions: {analysis['price_mentions']}</p>
+                <p>Brand Mentions: {analysis['brand_mentions']}</p>
+            </div>
+            <div class="metric-card">
+                <h3>Length Statistics</h3>
+                <p>Average Length: {analysis['length_stats']['avg_length']:.1f} characters</p>
+                <p>Maximum Length: {analysis['length_stats']['max_length']} characters</p>
+                <p>Minimum Length: {analysis['length_stats']['min_length']} characters</p>
+            </div>
+            <div class="metric-card">
+                <h3>Most Common Words</h3>
+                <div class="word-cloud">
+                    {' '.join([f'<span class="word-item">{word} ({count})</span>'
+                             for word, count in analysis['common_words'].items()])}
+                </div>
             </div>
         </div>
+    </body>
+    </html>
     """
     return html
         if min_length > 0:
             suggestions = [s for s in suggestions if len(s.split()) >= min_length]
+        if not suggestions:
+            return "No suggestions found", None, None
         # Analyze suggestions
         analyzer = KeywordAnalyzer()
         analysis = analyzer.analyze_suggestions(suggestions)
         # Add analysis results
         output_text += "\n" + format_analysis_output(analysis)
+        try:
+            # Create visualization
+            visualization = create_visualization(analysis)
+        except Exception as viz_error:
+            logging.error(f"Visualization error: {str(viz_error)}")
+            visualization = "<p>Error creating visualization</p>"
         return output_text, csv_filename, visualization
     except Exception as e:
         logging.error(f"Error in search_and_display: {str(e)}")
+        return f"Error occurred: {str(e)}", None, "<p>Error occurred during analysis</p>"
 # Create Gradio interface
 iface = gr.Interface(
     outputs=[
         gr.Textbox(label="Results", lines=20),
         gr.File(label="Download CSV"),
+        gr.HTML(label="Analysis Visualization")
     ],
     title="Advanced Amazon Suggestion Expander",
     description="""
     theme=gr.themes.Soft()
 )
 if __name__ == "__main__":
     iface.launch()