Azza / app.py
Walid Aissa
wikipedia api to find the summary of the article related to top keyword
a381bc0
raw
history blame
1.59 kB
import os
import gradio as gr
import wikipediaapi as wk
from transformers import (
TokenClassificationPipeline,
AutoModelForTokenClassification,
AutoTokenizer,
)
from transformers.pipelines import AggregationStrategy
import numpy as np
class KeyphraseExtractionPipeline(TokenClassificationPipeline):
def __init__(self, model, *args, **kwargs):
super().__init__(
model=AutoModelForTokenClassification.from_pretrained(model),
tokenizer=AutoTokenizer.from_pretrained(model),
*args,
**kwargs
)
def postprocess(self, model_outputs):
results = super().postprocess(
model_outputs=model_outputs,
aggregation_strategy=AggregationStrategy.SIMPLE,
)
return np.unique([result.get("word").strip() for result in results])
# Load pipeline
model_name = "ml6team/keyphrase-extraction-kbir-inspec"
extractor = KeyphraseExtractionPipeline(model=model_name)
def keyphrases_out(input): #Not used but might be useful
input = input.replace("\n", " ")
keyphrases = extractor(input)
out = "The Key Phrases in your text are:\n\n"
for k in keyphrases:
out += k + "\n"
return keyphrases
def wikipedia_search(input):
input = input.replace("\n", " ")
keyphrases = extractor(input)
wiki = wk.Wikipedia('en')
for k in keyphrases:
page = wiki.page(k)
if page.exists():
break
return page.summary
demo = gr.Interface(fn=wikipedia_search, inputs = "text", outputs = "text")
demo.launch()