|
import spacy |
|
import gradio as gr |
|
|
|
|
|
nlp = spacy.load("ru_core_news_md") |
|
|
|
|
|
def load_articles(file_path): |
|
with open(file_path, "r", encoding="utf-8") as f: |
|
articles = f.readlines() |
|
return [article.strip() for article in articles] |
|
|
|
|
|
def find_most_relevant_article(query, articles): |
|
query_doc = nlp(query) |
|
similarities = [(article, query_doc.similarity(nlp(article))) for article in articles] |
|
most_relevant_article = max(similarities, key=lambda x: x[1]) |
|
return most_relevant_article[0] |
|
|
|
|
|
file_path = "dataset.txt" |
|
articles = load_articles(file_path) |
|
|
|
|
|
def get_relevant_article(query): |
|
return find_most_relevant_article(query, articles) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=get_relevant_article, |
|
inputs="text", |
|
outputs="text", |
|
title="Поиск релевантных статей из Википедии", |
|
description="Введите запрос, чтобы найти самую релевантную статью из датасета." |
|
) |
|
|
|
|
|
iface.launch() |
|
|