import os import gradio as gr import wikipedia from huggingface_hub import InferenceClient from yarl import URL def get_article_urls(topic: str) -> str: search_results = wikipedia.search(topic, results=5) urls = [] for title in search_results: try: page = wikipedia.page(title) url = str(URL(page.url)) urls.append(url) except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError): continue return "\n".join(urls) def summarize_articles(topic: str) -> str: HF_API_TOKEN = os.getenv("HF_API_TOKEN") if not HF_API_TOKEN: return "Error: Hugging Face API token is not set. Please set the HF_API_TOKEN environment variable." MODEL_REPO_ID = "Qwen/Qwen2.5-72B-Instruct" client = InferenceClient(model=MODEL_REPO_ID, token=HF_API_TOKEN) search_results = wikipedia.search(topic, results=5) summaries = [] for title in search_results: try: page = wikipedia.page(title) content = page.content[:4000] prompt = f"Summarize the following text:\n\n{content}\n\nSummary:" summary = client.text_generation(prompt) url = str(URL(page.url)) summaries.append(f"URL: {url}\n\nSummary:\n{summary}\n{'-'*80}") except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError): continue if summaries is not None: return "\n\n".join(summaries) else: return "No articles found or unable to generate summaries." def main(): with gr.Blocks() as demo: gr.Markdown("# 📚 Wikipedia Article Summarizer") topic_input = gr.Textbox(label="Enter a Topic", placeholder="e.g., Climate Change") with gr.Row(): get_urls_button = gr.Button("Get Article URLs") summarize_button = gr.Button("Summarize Articles") urls_output = gr.Textbox(label="Article URLs", lines=5) summary_output = gr.Textbox(label="Article Summaries", lines=20) get_urls_button.click( fn=get_article_urls, inputs=topic_input, outputs=urls_output ) summarize_button.click( fn=summarize_articles, inputs=topic_input, outputs=summary_output ) demo.launch() if __name__ == "__main__": main()