Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,9 +7,16 @@ from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoToken
|
|
7 |
import nltk
|
8 |
import tempfile
|
9 |
import zipfile
|
|
|
|
|
|
|
|
|
10 |
|
|
|
11 |
nltk.download('wordnet')
|
12 |
nltk.download('omw-1.4')
|
|
|
|
|
13 |
|
14 |
# Load the pre-trained models for image captioning and summarization
|
15 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
|
@@ -35,15 +42,22 @@ def get_synonyms(word):
|
|
35 |
synonyms.add(lemma.name())
|
36 |
return synonyms
|
37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def search_captions(query, captions):
|
39 |
-
|
40 |
-
query_synonyms = set(query_words)
|
41 |
-
for word in query_words:
|
42 |
-
query_synonyms.update(get_synonyms(word))
|
43 |
|
44 |
results = []
|
45 |
for path, caption in captions.items():
|
46 |
-
|
|
|
47 |
results.append((path, caption))
|
48 |
|
49 |
return results
|
|
|
7 |
import nltk
|
8 |
import tempfile
|
9 |
import zipfile
|
10 |
+
from nltk.corpus import wordnet
|
11 |
+
import spacy
|
12 |
+
import io
|
13 |
+
from spacy.cli import download
|
14 |
|
15 |
+
# Download necessary NLP models
|
16 |
nltk.download('wordnet')
|
17 |
nltk.download('omw-1.4')
|
18 |
+
download("en_core_web_sm")
|
19 |
+
nlp = spacy.load("en_core_web_sm")
|
20 |
|
21 |
# Load the pre-trained models for image captioning and summarization
|
22 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-09"
|
|
|
42 |
synonyms.add(lemma.name())
|
43 |
return synonyms
|
44 |
|
45 |
+
def preprocess_query(query):
|
46 |
+
doc = nlp(query)
|
47 |
+
tokens = set()
|
48 |
+
for token in doc:
|
49 |
+
tokens.add(token.text)
|
50 |
+
tokens.add(token.lemma_)
|
51 |
+
tokens.update(get_synonyms(token.text))
|
52 |
+
return tokens
|
53 |
+
|
54 |
def search_captions(query, captions):
|
55 |
+
query_tokens = preprocess_query(query)
|
|
|
|
|
|
|
56 |
|
57 |
results = []
|
58 |
for path, caption in captions.items():
|
59 |
+
caption_tokens = preprocess_query(caption)
|
60 |
+
if query_tokens & caption_tokens:
|
61 |
results.append((path, caption))
|
62 |
|
63 |
return results
|