FDSRashid commited on
Commit
913f06b
·
verified ·
1 Parent(s): e563f21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -12
app.py CHANGED
@@ -4,7 +4,7 @@ import gradio as gr
4
  import os
5
  import pandas as pd
6
  from datasets import load_dataset
7
- from sklearn.metrics.pairwise import cosine_similarity, pairwise_distances
8
  from datasets import Features, Value
9
  import plotly.express as px
10
 
@@ -16,7 +16,6 @@ Secret_token = os.getenv('HF_token')
16
  dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
17
  books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
18
  df = dataset["train"].to_pandas()
19
- choice = ['cityblock', 'cosine', 'euclidean', 'l1', 'l2', 'manhattan', 'canberra', 'chebyshev']
20
 
21
  dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
22
  matn_info = dataset['train'].to_pandas()
@@ -32,19 +31,17 @@ matn_info = pd.merge(matn_info, books, on='Book_ID')
32
 
33
  matn_info = matn_info.reset_index()
34
  df = df.reset_index()
35
-
36
  cols_to_use = df.columns.difference(matn_info.columns)
37
-
38
  joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
39
  df = joined_df.copy()
40
  taraf_max = np.max(df['taraf_ID'].unique())
41
 
42
- def plot_similarity_score(taraf_num, metr):
43
  taraf_df = df[df['taraf_ID']== taraf_num]
44
  taraf_df['Number'] = np.arange(len(taraf_df))
45
  embed_taraf = taraf_df['embed'].to_list()
46
- cos_score = pairwise_distances(embed_taraf, metric = metr)
47
- fig = px.imshow(cos_score, color_continuous_scale='plasma_r')
48
  matr = cos_score
49
  rows, cols = matr.shape
50
  mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
@@ -54,11 +51,8 @@ def plot_similarity_score(taraf_num, metr):
54
  return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
55
 
56
  with gr.Blocks() as demo:
57
- gr.Markdown('# Semantic Distance Visualizer')
58
- gr.Markdown('Please note, the closer to zero, the better for this approach. This is semantic distance. On the Matrix plot, I\'ve reversed the color scales. This means that more yellow points are more similar, more purple = more dissimilar. ')
59
-
60
  taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
61
- metric = gr.Dropdown(choices = choice, value = 'cosine', label = 'Variable to Display', info = 'Choose the variable to visualize.')
62
  btn = gr.Button('Submit')
63
- btn.click(fn = plot_similarity_score, inputs = [taraf_number, metric], outputs = [gr.Plot(),gr.Plot(), gr.DataFrame()])
64
  demo.launch()
 
4
  import os
5
  import pandas as pd
6
  from datasets import load_dataset
7
+ from sklearn.metrics.pairwise import cosine_similarity
8
  from datasets import Features, Value
9
  import plotly.express as px
10
 
 
16
  dataset = load_dataset("FDSRashid/embed_matn", token = Secret_token)
17
  books = load_dataset('FDSRashid/Hadith_info', data_files='Books.csv', token=Secret_token)['train'].to_pandas()
18
  df = dataset["train"].to_pandas()
 
19
 
20
  dataset = load_dataset("FDSRashid/hadith_info", data_files = 'All_Matns.csv',token = Secret_token, features = features)
21
  matn_info = dataset['train'].to_pandas()
 
31
 
32
  matn_info = matn_info.reset_index()
33
  df = df.reset_index()
 
34
  cols_to_use = df.columns.difference(matn_info.columns)
 
35
  joined_df = pd.merge(matn_info,df[cols_to_use],left_index=True, right_index=True)
36
  df = joined_df.copy()
37
  taraf_max = np.max(df['taraf_ID'].unique())
38
 
39
+ def plot_similarity_score(taraf_num):
40
  taraf_df = df[df['taraf_ID']== taraf_num]
41
  taraf_df['Number'] = np.arange(len(taraf_df))
42
  embed_taraf = taraf_df['embed'].to_list()
43
+ cos_score = cosine_similarity(embed_taraf)
44
+ fig = px.imshow(cos_score)
45
  matr = cos_score
46
  rows, cols = matr.shape
47
  mask = np.tril(np.ones((rows, cols), dtype=bool), k=-1)
 
51
  return fig, fig_dis, taraf_df[['matn', 'Number', 'Book_Name', 'Author', 'Hadith Number']]
52
 
53
  with gr.Blocks() as demo:
54
+ gr.Markdown('# Semantic Similarity Visualizer')
 
 
55
  taraf_number = gr.Slider(1,taraf_max , value=10000, label="Taraf", info="Choose the Taraf to Input", step = 1)
 
56
  btn = gr.Button('Submit')
57
+ btn.click(fn = plot_similarity_score, inputs = [taraf_number], outputs = [gr.Plot(),gr.Plot(), gr.DataFrame()])
58
  demo.launch()