stogaja commited on
Commit
b3c4b1b
·
1 Parent(s): 1e1ce58

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # let's import the libraries
2
+ from sentence_transformers import util
3
+ from sentence_transformers import CrossEncoder
4
+ from sentence_transformers import SentenceTransformer
5
+ import time
6
+ import sys
7
+ import os
8
+ import torch
9
+ import en_core_web_sm
10
+ from email import header
11
+ import streamlit as st
12
+ import pandas as pd
13
+ import numpy as np
14
+ import pickle
15
+ import spacy
16
+ from sklearn.metrics.pairwise import cosine_similarity
17
+ from datasets import load_dataset
18
+ import io
19
+ import netrc
20
+ from tqdm import tqdm
21
+ tqdm.pandas()
22
+
23
+ # Load the English STSB dataset
24
+ stsb_dataset = load_dataset('stsb_multi_mt', 'en')
25
+ stsb_train = pd.DataFrame(stsb_dataset['train'])
26
+ stsb_test = pd.DataFrame(stsb_dataset['test'])
27
+
28
+ # let's create helper functions
29
+ nlp = spacy.load("en_core_web_sm")
30
+
31
+
32
+ def text_processing(sentence):
33
+ sentence = [token.lemma_.lower()
34
+ for token in nlp(sentence)
35
+ if token.is_alpha and not token.is_stop]
36
+ return sentence
37
+
38
+
39
+ def cos_sim(sentence1_emb, sentence2_emb):
40
+ cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
41
+ return np.diag(cos_sim)
42
+
43
+
44
+ # let's read the csv file
45
+ data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
46
+
47
+ prompt = "charles"
48
+ data['prompt'] = prompt
49
+ data.rename(columns={'target_text': 'sentence2',
50
+ 'prompt': 'sentence1'}, inplace=True)
51
+ data['sentence2'] = data['sentence2'].astype('str')
52
+ data['sentence1'] = data['sentence1'].astype('str')
53
+
54
+ XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
55
+ sentence_pairs = []
56
+ for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
57
+ sentence_pairs.append([sentence1, sentence2])
58
+
59
+ data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
60
+ sentence_pairs, show_progress_bar=True)
61
+
62
+ # sorting the values
63
+ data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
64
+
65
+ loaded_model = XpathFinder
66
+
67
+ # Containers
68
+ header_container = st.container()
69
+ mod_container = st.container()
70
+
71
+ # Header
72
+ with header_container:
73
+
74
+ # different levels of text you can include in your app
75
+ st.title("Xpath Finder App")
76
+
77
+
78
+ # model container
79
+ with mod_container:
80
+
81
+ # collecting input from user
82
+ prompt = st.text_input("Enter your description below ...")
83
+
84
+ # Loading e data
85
+ data = (pd.read_csv("SBERT_data.csv")
86
+ ).drop(['Unnamed: 0'], axis=1)
87
+
88
+ data['prompt'] = prompt
89
+ data.rename(columns={'target_text': 'sentence2',
90
+ 'prompt': 'sentence1'}, inplace=True)
91
+ data['sentence2'] = data['sentence2'].astype('str')
92
+ data['sentence1'] = data['sentence1'].astype('str')
93
+
94
+ # let's pass the input to the loaded_model with torch compiled with cuda
95
+ if prompt:
96
+ # let's get the result
97
+ simscore = loaded_model.predict([prompt])
98
+
99
+ from sentence_transformers import CrossEncoder
100
+ loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
101
+ sentence_pairs = []
102
+ for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
103
+ sentence_pairs.append([sentence1, sentence2])
104
+
105
+ # sorting the df to get highest scoring xpath_container
106
+ data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
107
+ most_acc = data.head(5)
108
+ # predictions
109
+ st.write("Highest Similarity score: ", simscore)
110
+ st.text("Is this one of these the Xpath you're looking for?")
111
+ st.write(st.write(most_acc["input_text"]))