Spaces:

MHamdan
/

ContentAnalyzer

Running

App Files Files Community

ContentAnalyzer / app.py

MHamdan

Initial content analyzer setup

5215be1 about 2 months ago

raw

history blame

7.28 kB

	# app.py
	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	from transformers import pipeline
	import PyPDF2
	import docx
	import os
	from typing import List, Tuple, Optional
	from smolagents import CodeAgent, HfApiModel, Tool

	class ContentAnalyzer:
	def __init__(self):
	# Initialize models
	self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
	self.sentiment_analyzer = pipeline("sentiment-analysis")
	self.zero_shot = pipeline("zero-shot-classification")

	def read_file(self, file_obj) -> str:
	"""Read content from different file types."""
	if file_obj is None:
	return ""

	file_ext = os.path.splitext(file_obj.name)[1].lower()

	try:
	if file_ext == '.txt':
	return file_obj.read().decode('utf-8')

	elif file_ext == '.pdf':
	pdf_reader = PyPDF2.PdfReader(file_obj)
	text = ""
	for page in pdf_reader.pages:
	text += page.extract_text() + "\n"
	return text

	elif file_ext == '.docx':
	doc = docx.Document(file_obj)
	return "\n".join([paragraph.text for paragraph in doc.paragraphs])

	else:
	return f"Unsupported file type: {file_ext}"

	except Exception as e:
	return f"Error reading file: {str(e)}"

	def fetch_web_content(self, url: str) -> str:
	"""Fetch content from URL."""
	try:
	response = requests.get(url, timeout=10)
	response.raise_for_status()
	soup = BeautifulSoup(response.text, 'html.parser')

	# Remove scripts and styles
	for script in soup(["script", "style"]):
	script.decompose()

	text = soup.get_text(separator='\n')
	lines = (line.strip() for line in text.splitlines())
	return "\n".join(line for line in lines if line)

	except Exception as e:
	return f"Error fetching URL: {str(e)}"

	def analyze_content(self,
	text: Optional[str] = None,
	url: Optional[str] = None,
	file: Optional[object] = None,
	analysis_types: List[str] = ["summarize"]) -> dict:
	"""Analyze content from text, URL, or file."""
	try:
	# Get content from appropriate source
	if url:
	content = self.fetch_web_content(url)
	elif file:
	content = self.read_file(file)
	else:
	content = text or ""

	if not content or content.startswith("Error"):
	return {"error": content or "No content provided"}

	results = {
	"original_text": content[:1000] + "..." if len(content) > 1000 else content
	}

	# Perform requested analyses
	if "summarize" in analysis_types:
	summary = self.summarizer(content[:1024], max_length=130, min_length=30)
	results["summary"] = summary[0]['summary_text']

	if "sentiment" in analysis_types:
	sentiment = self.sentiment_analyzer(content[:512])
	results["sentiment"] = {
	"label": sentiment[0]['label'],
	"score": round(sentiment[0]['score'], 3)
	}

	if "topics" in analysis_types:
	topics = self.zero_shot(
	content[:512],
	candidate_labels=["technology", "science", "business",
	"politics", "entertainment", "education",
	"health", "sports"]
	)
	results["topics"] = [
	{"label": label, "score": round(score, 3)}
	for label, score in zip(topics['labels'], topics['scores'])
	if score > 0.1
	]

	return results

	except Exception as e:
	return {"error": f"Analysis error: {str(e)}"}

	def create_interface():
	analyzer = ContentAnalyzer()

	with gr.Blocks(title="Content Analyzer") as demo:
	gr.Markdown("# 📑 Content Analyzer")
	gr.Markdown("Analyze text content from various sources using AI.")

	with gr.Tabs():
	# Text Input Tab
	with gr.Tab("Text Input"):
	text_input = gr.Textbox(
	label="Enter Text",
	placeholder="Paste your text here...",
	lines=5
	)

	# URL Input Tab
	with gr.Tab("Web URL"):
	url_input = gr.Textbox(
	label="Enter URL",
	placeholder="https://example.com"
	)

	# File Upload Tab
	with gr.Tab("File Upload"):
	file_input = gr.File(
	label="Upload File",
	file_types=[".txt", ".pdf", ".docx"]
	)

	# Analysis Options
	analysis_types = gr.CheckboxGroup(
	choices=["summarize", "sentiment", "topics"],
	value=["summarize"],
	label="Analysis Types"
	)

	analyze_btn = gr.Button("Analyze", variant="primary")

	# Output Sections
	with gr.Tabs():
	with gr.Tab("Original Text"):
	original_text = gr.Markdown()
	with gr.Tab("Summary"):
	summary_output = gr.Markdown()
	with gr.Tab("Sentiment"):
	sentiment_output = gr.Markdown()
	with gr.Tab("Topics"):
	topics_output = gr.Markdown()

	def process_analysis(text, url, file, types):
	# Get analysis results
	results = analyzer.analyze_content(text, url, file, types)

	if "error" in results:
	return results["error"], "", "", ""

	# Format outputs
	original = results.get("original_text", "")
	summary = results.get("summary", "")

	sentiment = ""
	if "sentiment" in results:
	sent = results["sentiment"]
	sentiment = f"Sentiment: {sent['label']} (Confidence: {sent['score']})"

	topics = ""
	if "topics" in results:
	topics = "Detected Topics:\n" + "\n".join([
	f"- {t['label']}: {t['score']}"
	for t in results["topics"]
	])

	return original, summary, sentiment, topics

	# Connect the interface
	analyze_btn.click(
	fn=process_analysis,
	inputs=[text_input, url_input, file_input, analysis_types],
	outputs=[original_text, summary_output, sentiment_output, topics_output]
	)

	return demo

	# Launch the app
	if __name__ == "__main__":
	demo = create_interface()
	demo.launch()