NourFakih commited on
Commit
bd964c8
·
verified ·
1 Parent(s): ab81e75

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -5
app.py CHANGED
@@ -7,9 +7,16 @@ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoToken
7
  import nltk
8
  import tempfile
9
  import zipfile
 
 
 
 
10
 
 
11
  nltk.download('wordnet')
12
  nltk.download('omw-1.4')
 
 
13
 
14
  # Load the pre-trained models for image captioning and summarization
15
  model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
@@ -35,15 +42,22 @@ def get_synonyms(word):
35
  synonyms.add(lemma.name())
36
  return synonyms
37
 
 
 
 
 
 
 
 
 
 
38
  def search_captions(query, captions):
39
- query_words = query.split()
40
- query_synonyms = set(query_words)
41
- for word in query_words:
42
- query_synonyms.update(get_synonyms(word))
43
 
44
  results = []
45
  for path, caption in captions.items():
46
- if any(word in caption.split() for word in query_synonyms):
 
47
  results.append((path, caption))
48
 
49
  return results
 
7
  import nltk
8
  import tempfile
9
  import zipfile
10
+ from nltk.corpus import wordnet
11
+ import spacy
12
+ import io
13
+ from spacy.cli import download
14
 
15
+ # Download necessary NLP models
16
  nltk.download('wordnet')
17
  nltk.download('omw-1.4')
18
+ download("en_core_web_sm")
19
+ nlp = spacy.load("en_core_web_sm")
20
 
21
  # Load the pre-trained models for image captioning and summarization
22
  model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
 
42
  synonyms.add(lemma.name())
43
  return synonyms
44
 
45
+ def preprocess_query(query):
46
+ doc = nlp(query)
47
+ tokens = set()
48
+ for token in doc:
49
+ tokens.add(token.text)
50
+ tokens.add(token.lemma_)
51
+ tokens.update(get_synonyms(token.text))
52
+ return tokens
53
+
54
  def search_captions(query, captions):
55
+ query_tokens = preprocess_query(query)
 
 
 
56
 
57
  results = []
58
  for path, caption in captions.items():
59
+ caption_tokens = preprocess_query(caption)
60
+ if query_tokens & caption_tokens:
61
  results.append((path, caption))
62
 
63
  return results