File size: 2,390 Bytes
05f3e6a
 
 
 
d8744e8
05f3e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
01668ec
d8744e8
05f3e6a
 
 
 
 
 
 
 
94a1530
05f3e6a
 
 
 
94a1530
05f3e6a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os

import gradio as gr
import wikipedia
from huggingface_hub import InferenceClient
from yarl import URL


def get_article_urls(topic: str) -> str:
    search_results = wikipedia.search(topic, results=5)
    urls = []
    for title in search_results:
        try:
            page = wikipedia.page(title)
            url = str(URL(page.url))
            urls.append(url)
        except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError):
            continue
    return "\n".join(urls)


def summarize_articles(topic: str) -> str:
    HF_API_TOKEN = os.getenv("HF_API_TOKEN")
    if not HF_API_TOKEN:
        return "Error: Hugging Face API token is not set. Please set the HF_API_TOKEN environment variable."

    MODEL_REPO_ID = "Qwen/Qwen2.5-72B-Instruct"

    client = InferenceClient(model=MODEL_REPO_ID, token=HF_API_TOKEN)

    search_results = wikipedia.search(topic, results=5)
    summaries = []
    for title in search_results:
        try:
            page = wikipedia.page(title)
            content = page.content[:4000]
            prompt = f"Summarize the following text:\n\n{content}\n\nSummary:"
            summary = client.text_generation(prompt)
            url = str(URL(page.url))
            summaries.append(f"URL: {url}\n\nSummary:\n{summary}\n{'-'*80}")
        except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError):
            continue
    if summaries is not None:
        return "\n\n".join(summaries)
    else:
        return "No articles found or unable to generate summaries."


def main():
    with gr.Blocks() as demo:
        gr.Markdown("# πŸ“š Wikipedia Article Summarizer")
        topic_input = gr.Textbox(label="Enter a Topic", placeholder="e.g., Climate Change")
        with gr.Row():
            get_urls_button = gr.Button("Get Article URLs")
            summarize_button = gr.Button("Summarize Articles")
        urls_output = gr.Textbox(label="Article URLs", lines=5)
        summary_output = gr.Textbox(label="Article Summaries", lines=20)

        get_urls_button.click(
            fn=get_article_urls,
            inputs=topic_input,
            outputs=urls_output
        )
        summarize_button.click(
            fn=summarize_articles,
            inputs=topic_input,
            outputs=summary_output
        )

    demo.launch()

if __name__ == "__main__":
    main()