Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
from datasets import load_dataset
|
7 |
-
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
from datasets import Features, Value
|
9 |
import plotly.express as px
|
10 |
|
@@ -16,7 +16,6 @@ Secret_token = os.getenv('HF_token')
|
|
16 |
dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
|
17 |
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
|
18 |
df = dataset["train"].to_pandas()
|
19 |
-
choice = ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'canberra', 'chebyshev']
|
20 |
|
21 |
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
|
22 |
matn_info = dataset['train'].to_pandas()
|
@@ -32,19 +31,17 @@ matn_info = pd.merge(matn_info, books, on='Book_ID')
|
|
32 |
|
33 |
matn_info = matn_info.reset_index()
|
34 |
df = df.reset_index()
|
35 |
-
|
36 |
cols_to_use = df.columns.difference(matn_info.columns)
|
37 |
-
|
38 |
joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
|
39 |
df = joined_df.copy()
|
40 |
taraf_max = np.max(df['taraf_ID'].unique())
|
41 |
|
42 |
-
def plot_similarity_score(taraf_num
|
43 |
taraf_df = df[df['taraf_ID']== taraf_num]
|
44 |
taraf_df['Number'] = np.arange(len(taraf_df))
|
45 |
embed_taraf = taraf_df['embed'].to_list()
|
46 |
-
cos_score =
|
47 |
-
fig = px.imshow(cos_score
|
48 |
matr = cos_score
|
49 |
rows, cols = matr.shape
|
50 |
mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
|
@@ -54,11 +51,8 @@ def plot_similarity_score(taraf_num, metr):
|
|
54 |
return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
|
55 |
|
56 |
with gr.Blocks() as demo:
|
57 |
-
gr.Markdown('# Semantic
|
58 |
-
gr.Markdown('Please note, the closer to zero, the better for this approach. This is semantic distance. On the Matrix plot, I\'ve reversed the color scales. This means that more yellow points are more similar, more purple = more dissimilar. ')
|
59 |
-
|
60 |
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
|
61 |
-
metric = gr.Dropdown(choices = choice, value = 'cosine', label = 'Variable to Display', info = 'Choose the variable to visualize.')
|
62 |
btn = gr.Button('Submit')
|
63 |
-
btn.click(fn = plot_similarity_score, inputs = [taraf_number
|
64 |
demo.launch()
|
|
|
4 |
import os
|
5 |
import pandas as pd
|
6 |
from datasets import load_dataset
|
7 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
8 |
from datasets import Features, Value
|
9 |
import plotly.express as px
|
10 |
|
|
|
16 |
dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
|
17 |
books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
|
18 |
df = dataset["train"].to_pandas()
|
|
|
19 |
|
20 |
dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
|
21 |
matn_info = dataset['train'].to_pandas()
|
|
|
31 |
|
32 |
matn_info = matn_info.reset_index()
|
33 |
df = df.reset_index()
|
|
|
34 |
cols_to_use = df.columns.difference(matn_info.columns)
|
|
|
35 |
joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
|
36 |
df = joined_df.copy()
|
37 |
taraf_max = np.max(df['taraf_ID'].unique())
|
38 |
|
39 |
+
def plot_similarity_score(taraf_num):
|
40 |
taraf_df = df[df['taraf_ID']== taraf_num]
|
41 |
taraf_df['Number'] = np.arange(len(taraf_df))
|
42 |
embed_taraf = taraf_df['embed'].to_list()
|
43 |
+
cos_score = cosine_similarity(embed_taraf)
|
44 |
+
fig = px.imshow(cos_score)
|
45 |
matr = cos_score
|
46 |
rows, cols = matr.shape
|
47 |
mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
|
|
|
51 |
return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
|
52 |
|
53 |
with gr.Blocks() as demo:
|
54 |
+
gr.Markdown('# Semantic Similarity Visualizer')
|
|
|
|
|
55 |
taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
|
|
|
56 |
btn = gr.Button('Submit')
|
57 |
+
btn.click(fn = plot_similarity_score, inputs = [taraf_number], outputs = [gr.Plot(),gr.Plot(), gr.DataFrame()])
|
58 |
demo.launch()
|