Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -11,10 +11,8 @@ import spacy
|
|
11 |
import io
|
12 |
from spacy.cli import download
|
13 |
|
14 |
-
# Download
|
15 |
download("en_core_web_sm")
|
16 |
-
|
17 |
-
# Load the model
|
18 |
nlp = spacy.load("en_core_web_sm")
|
19 |
|
20 |
# Download NLTK WordNet data
|
@@ -22,40 +20,24 @@ import nltk
|
|
22 |
nltk.download('wordnet')
|
23 |
nltk.download('omw-1.4')
|
24 |
|
25 |
-
# Load spaCy model
|
26 |
-
nlp = spacy.load("en_core_web_sm")
|
27 |
-
|
28 |
# Load the pre-trained model for image captioning
|
29 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-11"
|
30 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
31 |
feature_extractor = ViTImageProcessor.from_pretrained(model_name)
|
32 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
33 |
-
# GPT2 only has bos/eos tokens but not decoder_start/pad tokens
|
34 |
tokenizer.pad_token = tokenizer.eos_token
|
35 |
|
36 |
-
#
|
37 |
model.config.eos_token_id = tokenizer.eos_token_id
|
38 |
model.config.decoder_start_token_id = tokenizer.bos_token_id
|
39 |
model.config.pad_token_id = tokenizer.pad_token_id
|
40 |
|
41 |
-
def preprocess_query(query):
|
42 |
-
doc = nlp(query)
|
43 |
-
tokens = set()
|
44 |
-
for token in doc:
|
45 |
-
tokens.add(token.text)
|
46 |
-
tokens.add(token.lemma_)
|
47 |
-
tokens.update(get_synonyms(token.text))
|
48 |
-
st.write(f"Query tokens: {tokens}") # Debugging line
|
49 |
-
return tokens
|
50 |
-
|
51 |
def generate_caption(image):
|
52 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
53 |
output_ids = model.generate(pixel_values)
|
54 |
caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
55 |
-
st.write(f"Generated caption: {caption}") # Debugging line
|
56 |
return caption
|
57 |
|
58 |
-
|
59 |
def get_synonyms(word):
|
60 |
synonyms = set()
|
61 |
for syn in wordnet.synsets(word):
|
@@ -63,6 +45,14 @@ def get_synonyms(word):
|
|
63 |
synonyms.add(lemma.name())
|
64 |
return synonyms
|
65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
def search_captions(query, captions):
|
68 |
query_tokens = preprocess_query(query)
|
|
|
11 |
import io
|
12 |
from spacy.cli import download
|
13 |
|
14 |
+
# Download and load the spaCy model
|
15 |
download("en_core_web_sm")
|
|
|
|
|
16 |
nlp = spacy.load("en_core_web_sm")
|
17 |
|
18 |
# Download NLTK WordNet data
|
|
|
20 |
nltk.download('wordnet')
|
21 |
nltk.download('omw-1.4')
|
22 |
|
|
|
|
|
|
|
23 |
# Load the pre-trained model for image captioning
|
24 |
model_name = "NourFakih/Vit-GPT2-COCO2017Flickr-85k-11"
|
25 |
model = VisionEncoderDecoderModel.from_pretrained(model_name)
|
26 |
feature_extractor = ViTImageProcessor.from_pretrained(model_name)
|
27 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
28 |
tokenizer.pad_token = tokenizer.eos_token
|
29 |
|
30 |
+
# Update the model config
|
31 |
model.config.eos_token_id = tokenizer.eos_token_id
|
32 |
model.config.decoder_start_token_id = tokenizer.bos_token_id
|
33 |
model.config.pad_token_id = tokenizer.pad_token_id
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def generate_caption(image):
|
36 |
pixel_values = feature_extractor(images=image, return_tensors="pt").pixel_values
|
37 |
output_ids = model.generate(pixel_values)
|
38 |
caption = tokenizer.decode(output_ids[0], skip_special_tokens=True)
|
|
|
39 |
return caption
|
40 |
|
|
|
41 |
def get_synonyms(word):
|
42 |
synonyms = set()
|
43 |
for syn in wordnet.synsets(word):
|
|
|
45 |
synonyms.add(lemma.name())
|
46 |
return synonyms
|
47 |
|
48 |
+
def preprocess_query(query):
|
49 |
+
doc = nlp(query)
|
50 |
+
tokens = set()
|
51 |
+
for token in doc:
|
52 |
+
tokens.add(token.text)
|
53 |
+
tokens.add(token.lemma_)
|
54 |
+
tokens.update(get_synonyms(token.text))
|
55 |
+
return tokens
|
56 |
|
57 |
def search_captions(query, captions):
|
58 |
query_tokens = preprocess_query(query)
|