Spaces:
Runtime error
Runtime error
File size: 2,510 Bytes
a110314 913f06b d199ed5 a110314 d199ed5 bdfb520 a110314 bdfb520 0da7061 a110314 c068b81 8f36d42 c068b81 cc6b2ae 37a7276 cc6b2ae ab0e6d7 16bb533 ab0e6d7 c068b81 16bb533 c068b81 913f06b fbd7946 9412d3c fbd7946 913f06b 9bbd6fb 1f77093 d199ed5 a110314 913f06b a110314 5ad39da a110314 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import numpy as np
import gradio as gr
import os
import pandas as pd
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
from datasets import Features, Value
import plotly.express as px
features = Features({'matn': Value('string'), 'taraf_ID': Value('string'), 'bookid_hadithid': Value('string')})
Secret_token = os.getenv('HF_token')
dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
df = dataset["train"].to_pandas()
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
matn_info = dataset['train'].to_pandas()
matn_info = matn_info.drop(97550)
matn_info = matn_info.drop(307206)
matn_info['taraf_ID'] = matn_info['taraf_ID'].replace('KeyAbsent', -1)
matn_info['taraf_ID'] = matn_info['taraf_ID'].astype(int)
matn_info['Book_ID'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[0]))
matn_info['Hadith Number'] = matn_info['bookid_hadithid'].apply(lambda x: int(x.split('_')[1]))
matn_info = pd.merge(matn_info, books, on='Book_ID')
matn_info = matn_info.reset_index()
df = df.reset_index()
cols_to_use = df.columns.difference(matn_info.columns)
joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
df = joined_df.copy()
taraf_max = np.max(df['taraf_ID'].unique())
def plot_similarity_score(taraf_num):
taraf_df = df[df['taraf_ID']== taraf_num]
taraf_df['Number'] = np.arange(len(taraf_df))
embed_taraf = taraf_df['embed'].to_list()
cos_score = cosine_similarity(embed_taraf)
fig = px.imshow(cos_score)
matr = cos_score
rows, cols = matr.shape
mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
lower_triangle = matr[mask]
data = lower_triangle.flatten()
fig_dis = px.histogram(x = data, title = f'Similarity Distribution for Taraf {taraf_num}', labels = {'x': 'Similarity Score'}, nbins = 20, template = 'ggplot2' )
return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
with gr.Blocks() as demo:
gr.Markdown('# Semantic Similarity Visualizer')
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
btn = gr.Button('Submit')
btn.click(fn = plot_similarity_score, inputs = [taraf_number], outputs = [gr.Plot(),gr.Plot(), gr.DataFrame(wrap=True)])
demo.launch()
|