MHamdan commited on
Commit
37ff7dd
Β·
1 Parent(s): 5b0db0d

Initial commit with full functionality

Browse files
Files changed (4) hide show
  1. app.py +61 -0
  2. requirements.txt +6 -0
  3. smart_web_analyzer.py +54 -0
  4. space.yml +5 -0
app.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ import gradio as gr
3
+ from smart_web_analyzer import WebAnalyzer
4
+
5
+ analyzer = WebAnalyzer()
6
+
7
+ def format_results(results: dict) -> dict:
8
+ """Format analysis results for Gradio tabs"""
9
+ outputs = {}
10
+ if 'error' in results:
11
+ return {"πŸ“œ Error": f"❌ {results['error']}"}
12
+
13
+ outputs["πŸ“œ Clean Text"] = results.get('clean_text', 'No text extracted')
14
+
15
+ if 'summary' in results:
16
+ outputs["πŸ“ Summary"] = f"**AI Summary:**\n{results['summary']}"
17
+
18
+ if 'sentiment' in results:
19
+ outputs["🎭 Sentiment"] = f"**Sentiment Score:**\n{results['sentiment']}"
20
+
21
+ if 'topics' in results:
22
+ topics = "\n".join([f"- **{k}**: {v:.2f}" for k,v in results['topics'].items()])
23
+ outputs["πŸ“Š Topics"] = f"**Detected Topics:**\n{topics}"
24
+
25
+ return outputs
26
+
27
+ with gr.Blocks(title="Smart Web Analyzer Plus") as demo:
28
+ gr.Markdown("# 🌐 Smart Web Analyzer Plus")
29
+
30
+ with gr.Row():
31
+ url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com")
32
+ modes = gr.CheckboxGroup(["summarize", "sentiment", "topics"],
33
+ label="Analysis Types")
34
+ submit_btn = gr.Button("Analyze", variant="primary")
35
+
36
+ with gr.Tabs():
37
+ with gr.Tab("πŸ“œ Clean Text"):
38
+ clean_text = gr.Markdown()
39
+ with gr.Tab("πŸ“ Summary"):
40
+ summary = gr.Markdown()
41
+ with gr.Tab("🎭 Sentiment"):
42
+ sentiment = gr.Markdown()
43
+ with gr.Tab("πŸ“Š Topics"):
44
+ topics = gr.Markdown()
45
+
46
+ examples = gr.Examples(
47
+ examples=[
48
+ ["https://www.bbc.com/news/technology-67881954", ["summarize", "sentiment"]],
49
+ ["https://arxiv.org/html/2312.17296v1", ["topics", "summarize"]]
50
+ ],
51
+ inputs=[url_input, modes]
52
+ )
53
+
54
+ submit_btn.click(
55
+ fn=lambda url, m: format_results(analyzer.analyze(url, m)),
56
+ inputs=[url_input, modes],
57
+ outputs=[clean_text, summary, sentiment, topics]
58
+ )
59
+
60
+ if __name__ == "__main__":
61
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # requirements.txt
2
+ gradio>=4.0.0
3
+ beautifulsoup4>=4.12.0
4
+ requests>=2.31.0
5
+ transformers>=4.40.0
6
+ torch>=2.2.0
smart_web_analyzer.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # smart_web_analyzer.py
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from transformers import pipeline
5
+ import torch
6
+
7
+ class WebAnalyzer:
8
+ def __init__(self):
9
+ self.device = 0 if torch.cuda.is_available() else -1
10
+ self.models = {
11
+ 'summarize': pipeline("summarization", model="facebook/bart-large-cnn"),
12
+ 'sentiment': pipeline("text-classification",
13
+ model="nlptown/bert-base-multilingual-uncased-sentiment"),
14
+ 'topics': pipeline("zero-shot-classification",
15
+ model="facebook/bart-large-mnli")
16
+ }
17
+
18
+ def fetch_content(self, url: str) -> str:
19
+ """Fetch webpage content with custom headers"""
20
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
21
+ response = requests.get(url, headers=headers, timeout=15)
22
+ response.raise_for_status()
23
+ return response.text
24
+
25
+ def clean_html(self, html: str) -> str:
26
+ """Basic HTML cleaning preserving all tags"""
27
+ soup = BeautifulSoup(html, 'html.parser')
28
+ return soup.prettify()
29
+
30
+ def analyze(self, url: str, modes: list) -> dict:
31
+ """Core analysis pipeline"""
32
+ results = {}
33
+ try:
34
+ html = self.fetch_content(url)
35
+ results['clean_text'] = self.clean_html(html)
36
+
37
+ if 'summarize' in modes:
38
+ results['summary'] = self.models['summarize'](html, max_length=150)[0]['summary_text']
39
+
40
+ if 'sentiment' in modes:
41
+ sentiment = self.models['sentiment'](html[:512])[0]
42
+ results['sentiment'] = f"{sentiment['label']} ({sentiment['score']:.2f})"
43
+
44
+ if 'topics' in modes:
45
+ topics = self.models['topics'](html[:512],
46
+ candidate_labels=["Technology", "AI", "Business",
47
+ "Science", "Politics"])
48
+ results['topics'] = {topic: score for topic, score
49
+ in zip(topics['labels'], topics['scores'])}
50
+
51
+ except Exception as e:
52
+ results['error'] = str(e)
53
+
54
+ return results
space.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # space.yml
2
+ title: Content Web Analyzer Plus
3
+ sdk: gradio
4
+ python:
5
+ version: "3.10"