Spaces:

AiDeveloper1
/

RCS

Sleeping

App Files Files Community

AiDeveloper1 commited on Jun 5

Commit

5c7501f

verified ·

1 Parent(s): 0bcc68d

Update summarizer.py

Browse files

Files changed (1) hide show

summarizer.py +204 -204

summarizer.py CHANGED Viewed

@@ -1,205 +1,205 @@
-import os
-import re
-from typing import Dict, Optional
-import google.generativeai as genai
-import logging
-from dotenv import load_dotenv
-from urllib.parse import urlparse
-from cachetools import TTLCache
-# Load environment variables
-load_dotenv()
-# Set up logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# In-memory cache: 1000 items, 1-hour TTL
-cache = TTLCache(maxsize=1000, ttl=3600)
-async def summarize_text(text: str, url: str = "") -> Dict[str, str]:
-    """Summarize text into a title and description using Gemini-1.5 Flash."""
-    try:
-        # Validate inputs
-        text = text.strip() if text else ""
-        if not url:
-            url = "https://example.com"
-        try:
-            parsed_url = urlparse(url)
-            domain = parsed_url.netloc or "example.com"
-        except Exception:
-            logging.warning(f"Invalid URL: {url}. Using default domain.")
-            domain = "example.com"
-        # Check cache
-        cache_key = f"summarize_{hash(text + url)}"
-        if cache_key in cache:
-            logging.info(f"Cache hit for {cache_key}")
-            return cache[cache_key]
-        # Get Gemini API key
-        api_key = os.getenv("GEMINI_API_KEY")
-        if not api_key:
-            logging.error("Gemini API key not found. Please set GEMINI_API_KEY in .env file.")
-            raise ValueError("Gemini API key is required for summarization.")
-        # Configure Gemini client
-        genai.configure(api_key=api_key)
-        model = genai.GenerativeModel('gemini-1.5-flash')
-        # Handle short or empty text
-        if len(text) < 20:
-            logging.warning(f"Text too short ({len(text)} chars): '{text}'. Using URL context.")
-            text = f"Content from {url} about news, products, or services."
-        # Split text into chunks to avoid quota issues (e.g., 1000 chars per chunk)
-        chunk_size = 1000
-        text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-        summaries = []
-        for chunk in text_chunks[:2]:  # Limit to first 2000 chars for efficiency
-            prompt = (
-                f"Summarize the following text into a title (30-50 characters) and a description (80-100 characters) "
-                f"for RCS messaging. Ensure titles are catchy and descriptions are engaging, relevant to the content, "
-                f"and suitable for a news, product, or service context inferred from the URL ({url}). "
-                f"Output as JSON:\n{{\"title\": \"[title]\", \"description\": \"[description]\"}}\n\nText: {chunk}"
-            )
-            response = await model.generate_content_async(prompt)
-            raw_content = response.text.strip()
-            logging.info(f"Raw Gemini response: {raw_content}")
-            # Parse response with regex
-            try:
-                match = re.search(r'\{[\s\S]*"title":\s*"([^"]+)"[\s\S]*"description":\s*"([^"]+)"[\s\S]*\}', raw_content)
-                if match:
-                    title = match.group(1)
-                    description = match.group(2)
-                    summaries.append({"title": title, "description": description})
-                else:
-                    raise ValueError("Invalid JSON format in Gemini response")
-            except Exception as e:
-                logging.warning(f"Failed to parse Gemini response: {e}. Skipping chunk.")
-                continue
-        # Combine summaries (prioritize first valid summary)
-        if summaries:
-            result = summaries[0]
-        else:
-            logging.warning("No valid summaries generated. Using fallback.")
-            result = {
-                "title": "News Summary",
-                "description": f"Discover news and insights from {domain}."[:100]
-            }
-        # Ensure non-empty outputs
-        if not result["title"].strip():
-            result["title"] = "News Summary"
-        if not result["description"].strip():
-            result["description"] = f"Discover news and insights from {domain}."[:100]
-        cache[cache_key] = result
-        logging.info(f"Summary - Title: {result['title']}, Description: {result['description']}")
-        return result
-    except Exception as e:
-        logging.error(f"Error summarizing text: {e}")
-        domain = urlparse(url).netloc or "example.com"
-        result = {
-            "title": "News Summary",
-            "description": f"Discover news and insights from {domain}."[:100]
-        }
-        cache[cache_key] = result
-        return result
-async def quick_summarize(text: str, url: str = "") -> Dict[str, str]:
-    """Quickly summarize text with a lightweight prompt using Gemini-1.5 Flash."""
-    try:
-        # Validate inputs
-        text = text.strip() if text else ""
-        if not url:
-            url = "https://example.com"
-        try:
-            parsed_url = urlparse(url)
-            domain = parsed_url.netloc or "example.com"
-        except Exception:
-            logging.warning(f"Invalid URL: {url}. Using default domain.")
-            domain = "example.com"
-        # Check cache
-        cache_key = f"quick_summarize_{hash(text + url)}"
-        if cache_key in cache:
-            logging.info(f"Cache hit for {cache_key}")
-            return cache[cache_key]
-        # Get Gemini API key
-        api_key = os.getenv("GEMINI_API_KEY")
-        if not api_key:
-            logging.error("Gemini API key not found. Please set GEMINI_API_KEY in .env file.")
-            raise ValueError("Gemini API key is required for summarization.")
-        # Configure Gemini client
-        genai.configure(api_key=api_key)
-        model = genai.GenerativeModel('gemini-1.5-flash')
-        # Handle short or empty text
-        if len(text) < 20:
-            logging.warning(f"Text too short ({len(text)} chars): '{text}'. Using URL context.")
-            text = f"Content from {url} about news, products, or services."
-        # Lightweight prompt with chunking
-        chunk_size = 1000
-        text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
-        summaries = []
-        for chunk in text_chunks[:1]:  # Limit to first 1000 chars for quick summary
-            prompt = (
-                f"Create a title (30-50 chars) and description (80-100 chars) for RCS messaging from this text. "
-                f"Keep it engaging and relevant to {url}. Output as JSON:\n{{\"title\": \"[title]\", \"description\": \"[description]\"}}\n\nText: {chunk}"
-            )
-            response = await model.generate_content_async(prompt)
-            raw_content = response.text.strip()
-            logging.info(f"Raw Gemini response (quick): {raw_content}")
-            # Parse response with regex
-            try:
-                match = re.search(r'\{[\s\S]*"title":\s*"([^"]+)"[\s\S]*"description":\s*"([^"]+)"[\s\S]*\}', raw_content)
-                if match:
-                    title = match.group(1)[:50]
-                    description = match.group(2)[:100]
-                    summaries.append({"title": title, "description": description})
-                else:
-                    raise ValueError("Invalid JSON format in Gemini response")
-            except Exception as e:
-                logging.warning(f"Failed to parse Gemini response: {e}. Skipping chunk.")
-                continue
-        # Use first valid summary or fallback
-        if summaries:
-            result = summaries[0]
-        else:
-            logging.warning("No valid summaries generated. Using fallback.")
-            result = {
-                "title": "Quick Summary",
-                "description": f"Check out content from {domain}."[:100]
-            }
-        # Ensure non-empty outputs
-        if not result["title"].strip():
-            result["title"] = "Quick Summary"
-        if not result["description"].strip():
-            result["description"] = f"Check out content from {domain}."[:100]
-        cache[cache_key] = result
-        logging.info(f"Quick summary - Title: {result['title']}, Description: {result['description']}")
-        return result
-    except Exception as e:
-        logging.error(f"Error in quick summarize: {e}")
-        domain = urlparse(url).netloc or "example.com"
-        result = {
-            "title": "Quick Summary",
-            "description": f"Check out content from {domain}."[:100]
-        }
-        cache[cache_key] = result
         return result

+import os
+import re
+from typing import Dict, Optional
+import google.generativeai as genai
+import logging
+from dotenv import load_dotenv
+from urllib.parse import urlparse
+from cachetools import TTLCache
+# Load environment variables
+load_dotenv()
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# In-memory cache: 1000 items, 1-hour TTL
+cache = TTLCache(maxsize=1000, ttl=3600)
+async def summarize_text(text: str, url: str = "") -> Dict[str, str]:
+    """Summarize text into a title and description using Gemini-1.5 Flash."""
+    try:
+        # Validate inputs
+        text = text.strip() if text else ""
+        if not url:
+            url = "https://example.com"
+        try:
+            parsed_url = urlparse(url)
+            domain = parsed_url.netloc or "example.com"
+        except Exception:
+            logging.warning(f"Invalid URL: {url}. Using default domain.")
+            domain = "example.com"
+        # Check cache
+        cache_key = f"summarize_{hash(text + url)}"
+        if cache_key in cache:
+            logging.info(f"Cache hit for {cache_key}")
+            return cache[cache_key]
+        # Get Gemini API key
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            logging.error("Gemini API key not found. Please set GEMINI_API_KEY in .env file.")
+            raise ValueError("Gemini API key is required for summarization.")
+        # Configure Gemini client
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel('gemini-1.5-flash')
+        # Handle short or empty text
+        if len(text) < 20:
+            logging.warning(f"Text too short ({len(text)} chars): '{text}'. Using URL context.")
+            text = f"Content from {url} about news, products, or services."
+        # Split text into chunks to avoid quota issues (e.g., 1000 chars per chunk)
+        chunk_size = 1000
+        text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+        summaries = []
+        for chunk in text_chunks[:2]:  # Limit to first 2000 chars for efficiency
+            prompt = (
+                f"Summarize the following text into a title (30-50 characters) and a description (80-100 characters) "
+                f"for RCS messaging. Ensure titles are catchy and descriptions are engaging, relevant to the content, "
+                f"and suitable for a news, product, or service context inferred from the URL ({url}). "
+                f"Output as JSON:\n{{\"title\": \"[title]\", \"description\": \"[description]\"}}\n\nText: {chunk}"
+            )
+            response = await model.generate_content_async(prompt)
+            raw_content = response.text.strip()
+            logging.info(f"Raw Gemini response: {raw_content}")
+            # Parse response with regex
+            try:
+                match = re.search(r'\{[\s\S]*"title":\s*"([^"]+)"[\s\S]*"description":\s*"([^"]+)"[\s\S]*\}', raw_content)
+                if match:
+                    title = match.group(1)
+                    description = match.group(2)
+                    summaries.append({"title": title, "description": description})
+                else:
+                    raise ValueError("Invalid JSON format in Gemini response")
+            except Exception as e:
+                logging.warning(f"Failed to parse Gemini response: {e}. Skipping chunk.")
+                continue
+        # Combine summaries (prioritize first valid summary)
+        if summaries:
+            result = summaries[0]
+        else:
+            logging.warning("No valid summaries generated. Using fallback.")
+            result = {
+                "title": "News Summary",
+                "description": f"Discover news and insights from {domain}."[:100]
+            }
+        # Ensure non-empty outputs
+        if not result["title"].strip():
+            result["title"] = "News Summary"
+        if not result["description"].strip():
+            result["description"] = f"Discover news and insights from {domain}."
+        cache[cache_key] = result
+        logging.info(f"Summary - Title: {result['title']}, Description: {result['description']}")
+        return result
+    except Exception as e:
+        logging.error(f"Error summarizing text: {e}")
+        domain = urlparse(url).netloc or "example.com"
+        result = {
+            "title": "News Summary",
+            "description": f"Discover news and insights from {domain}."[:100]
+        }
+        cache[cache_key] = result
+        return result
+async def quick_summarize(text: str, url: str = "") -> Dict[str, str]:
+    """Quickly summarize text with a lightweight prompt using Gemini-1.5 Flash."""
+    try:
+        # Validate inputs
+        text = text.strip() if text else ""
+        if not url:
+            url = "https://example.com"
+        try:
+            parsed_url = urlparse(url)
+            domain = parsed_url.netloc or "example.com"
+        except Exception:
+            logging.warning(f"Invalid URL: {url}. Using default domain.")
+            domain = "example.com"
+        # Check cache
+        cache_key = f"quick_summarize_{hash(text + url)}"
+        if cache_key in cache:
+            logging.info(f"Cache hit for {cache_key}")
+            return cache[cache_key]
+        # Get Gemini API key
+        api_key = os.getenv("GEMINI_API_KEY")
+        if not api_key:
+            logging.error("Gemini API key not found. Please set GEMINI_API_KEY in .env file.")
+            raise ValueError("Gemini API key is required for summarization.")
+        # Configure Gemini client
+        genai.configure(api_key=api_key)
+        model = genai.GenerativeModel('gemini-1.5-pro')
+        # Handle short or empty text
+        if len(text) < 20:
+            logging.warning(f"Text too short ({len(text)} chars): '{text}'. Using URL context.")
+            text = f"Content from {url} about news, products, or services."
+        # Lightweight prompt with chunking
+        chunk_size = 1000
+        text_chunks = [text[i:i + chunk_size] for i in range(0, len(text), chunk_size)]
+        summaries = []
+        for chunk in text_chunks[:1]:  # Limit to first 1000 chars for quick summary
+            prompt = (
+                f"Create a title (30-50 chars) and description (80-100 chars) for RCS messaging from this text. "
+                f"Keep it engaging and relevant to {url}. Output as JSON:\n{{\"title\": \"[title]\", \"description\": \"[description]\"}}\n\nText: {chunk}"
+            )
+            response = await model.generate_content_async(prompt)
+            raw_content = response.text.strip()
+            logging.info(f"Raw Gemini response (quick): {raw_content}")
+            # Parse response with regex
+            try:
+                match = re.search(r'\{[\s\S]*"title":\s*"([^"]+)"[\s\S]*"description":\s*"([^"]+)"[\s\S]*\}', raw_content)
+                if match:
+                    title = match.group(1)
+                    description = match.group(2)
+                    summaries.append({"title": title, "description": description})
+                else:
+                    raise ValueError("Invalid JSON format in Gemini response")
+            except Exception as e:
+                logging.warning(f"Failed to parse Gemini response: {e}. Skipping chunk.")
+                continue
+        # Use first valid summary or fallback
+        if summaries:
+            result = summaries[0]
+        else:
+            logging.warning("No valid summaries generated. Using fallback.")
+            result = {
+                "title": "Quick Summary",
+                "description": f"Check out content from {domain}."
+            }
+        # Ensure non-empty outputs
+        if not result["title"].strip():
+            result["title"] = "Quick Summary"
+        if not result["description"].strip():
+            result["description"] = f"Check out content from {domain}."
+        cache[cache_key] = result
+        logging.info(f"Quick summary - Title: {result['title']}, Description: {result['description']}")
+        return result
+    except Exception as e:
+        logging.error(f"Error in quick summarize: {e}")
+        domain = urlparse(url).netloc or "example.com"
+        result = {
+            "title": "Quick Summary",
+            "description": f"Check out content from {domain}."
+        }
+        cache[cache_key] = result
         return result