Spaces:
Running
Running
# app.py | |
import gradio as gr | |
import requests | |
from bs4 import BeautifulSoup | |
from transformers import pipeline | |
import PyPDF2 | |
import docx | |
import os | |
from typing import List, Tuple, Optional | |
from smolagents import CodeAgent, HfApiModel, Tool | |
class ContentAnalyzer: | |
def __init__(self): | |
# Initialize models | |
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
self.sentiment_analyzer = pipeline("sentiment-analysis") | |
self.zero_shot = pipeline("zero-shot-classification") | |
def read_file(self, file_obj) -> str: | |
"""Read content from different file types.""" | |
if file_obj is None: | |
return "" | |
file_ext = os.path.splitext(file_obj.name)[1].lower() | |
try: | |
if file_ext == '.txt': | |
return file_obj.read().decode('utf-8') | |
elif file_ext == '.pdf': | |
pdf_reader = PyPDF2.PdfReader(file_obj) | |
text = "" | |
for page in pdf_reader.pages: | |
text += page.extract_text() + "\n" | |
return text | |
elif file_ext == '.docx': | |
doc = docx.Document(file_obj) | |
return "\n".join([paragraph.text for paragraph in doc.paragraphs]) | |
else: | |
return f"Unsupported file type: {file_ext}" | |
except Exception as e: | |
return f"Error reading file: {str(e)}" | |
def fetch_web_content(self, url: str) -> str: | |
"""Fetch content from URL.""" | |
try: | |
response = requests.get(url, timeout=10) | |
response.raise_for_status() | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# Remove scripts and styles | |
for script in soup(["script", "style"]): | |
script.decompose() | |
text = soup.get_text(separator='\n') | |
lines = (line.strip() for line in text.splitlines()) | |
return "\n".join(line for line in lines if line) | |
except Exception as e: | |
return f"Error fetching URL: {str(e)}" | |
def analyze_content(self, | |
text: Optional[str] = None, | |
url: Optional[str] = None, | |
file: Optional[object] = None, | |
analysis_types: List[str] = ["summarize"]) -> dict: | |
"""Analyze content from text, URL, or file.""" | |
try: | |
# Get content from appropriate source | |
if url: | |
content = self.fetch_web_content(url) | |
elif file: | |
content = self.read_file(file) | |
else: | |
content = text or "" | |
if not content or content.startswith("Error"): | |
return {"error": content or "No content provided"} | |
results = { | |
"original_text": content[:1000] + "..." if len(content) > 1000 else content | |
} | |
# Perform requested analyses | |
if "summarize" in analysis_types: | |
summary = self.summarizer(content[:1024], max_length=130, min_length=30) | |
results["summary"] = summary[0]['summary_text'] | |
if "sentiment" in analysis_types: | |
sentiment = self.sentiment_analyzer(content[:512]) | |
results["sentiment"] = { | |
"label": sentiment[0]['label'], | |
"score": round(sentiment[0]['score'], 3) | |
} | |
if "topics" in analysis_types: | |
topics = self.zero_shot( | |
content[:512], | |
candidate_labels=["technology", "science", "business", | |
"politics", "entertainment", "education", | |
"health", "sports"] | |
) | |
results["topics"] = [ | |
{"label": label, "score": round(score, 3)} | |
for label, score in zip(topics['labels'], topics['scores']) | |
if score > 0.1 | |
] | |
return results | |
except Exception as e: | |
return {"error": f"Analysis error: {str(e)}"} | |
def create_interface(): | |
analyzer = ContentAnalyzer() | |
with gr.Blocks(title="Content Analyzer") as demo: | |
gr.Markdown("# π Content Analyzer") | |
gr.Markdown("Analyze text content from various sources using AI.") | |
with gr.Tabs(): | |
# Text Input Tab | |
with gr.Tab("Text Input"): | |
text_input = gr.Textbox( | |
label="Enter Text", | |
placeholder="Paste your text here...", | |
lines=5 | |
) | |
# URL Input Tab | |
with gr.Tab("Web URL"): | |
url_input = gr.Textbox( | |
label="Enter URL", | |
placeholder="https://example.com" | |
) | |
# File Upload Tab | |
with gr.Tab("File Upload"): | |
file_input = gr.File( | |
label="Upload File", | |
file_types=[".txt", ".pdf", ".docx"] | |
) | |
# Analysis Options | |
analysis_types = gr.CheckboxGroup( | |
choices=["summarize", "sentiment", "topics"], | |
value=["summarize"], | |
label="Analysis Types" | |
) | |
analyze_btn = gr.Button("Analyze", variant="primary") | |
# Output Sections | |
with gr.Tabs(): | |
with gr.Tab("Original Text"): | |
original_text = gr.Markdown() | |
with gr.Tab("Summary"): | |
summary_output = gr.Markdown() | |
with gr.Tab("Sentiment"): | |
sentiment_output = gr.Markdown() | |
with gr.Tab("Topics"): | |
topics_output = gr.Markdown() | |
def process_analysis(text, url, file, types): | |
# Get analysis results | |
results = analyzer.analyze_content(text, url, file, types) | |
if "error" in results: | |
return results["error"], "", "", "" | |
# Format outputs | |
original = results.get("original_text", "") | |
summary = results.get("summary", "") | |
sentiment = "" | |
if "sentiment" in results: | |
sent = results["sentiment"] | |
sentiment = f"**Sentiment:** {sent['label']} (Confidence: {sent['score']})" | |
topics = "" | |
if "topics" in results: | |
topics = "**Detected Topics:**\n" + "\n".join([ | |
f"- {t['label']}: {t['score']}" | |
for t in results["topics"] | |
]) | |
return original, summary, sentiment, topics | |
# Connect the interface | |
analyze_btn.click( | |
fn=process_analysis, | |
inputs=[text_input, url_input, file_input, analysis_types], | |
outputs=[original_text, summary_output, sentiment_output, topics_output] | |
) | |
return demo | |
# Launch the app | |
if __name__ == "__main__": | |
demo = create_interface() | |
demo.launch() |