stogaja commited on
Commit
be2864a
·
1 Parent(s): 76d621c

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -10
app.py CHANGED
@@ -20,20 +20,20 @@ stsb_dataset = load_dataset('stsb_multi_mt', 'en')
20
  stsb_train = pd.DataFrame(stsb_dataset['train'])
21
  stsb_test = pd.DataFrame(stsb_dataset['test'])
22
 
23
- # # let's create helper functions
24
- # nlp = spacy.load("en_core_web_sm")
25
 
26
 
27
- # def text_processing(sentence):
28
- # sentence = [token.lemma_.lower()
29
- # for token in nlp(sentence)
30
- # if token.is_alpha and not token.is_stop]
31
- # return sentence
32
 
33
 
34
- # def cos_sim(sentence1_emb, sentence2_emb):
35
- # cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
36
- # return np.diag(cos_sim)
37
 
38
 
39
  # let's read the csv file
 
20
  stsb_train = pd.DataFrame(stsb_dataset['train'])
21
  stsb_test = pd.DataFrame(stsb_dataset['test'])
22
 
23
+ # let's create helper functions
24
+ nlp = spacy.load("en_core_web_sm")
25
 
26
 
27
+ def text_processing(sentence):
28
+ sentence = [token.lemma_.lower()
29
+ for token in nlp(sentence)
30
+ if token.is_alpha and not token.is_stop]
31
+ return sentence
32
 
33
 
34
+ def cos_sim(sentence1_emb, sentence2_emb):
35
+ cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
36
+ return np.diag(cos_sim)
37
 
38
 
39
  # let's read the csv file