Spaces:
Runtime error
Runtime error
Henry Qu
commited on
Commit
·
a79b023
1
Parent(s):
bb448d0
app.py
CHANGED
@@ -4,13 +4,14 @@ from huggingface_hub import hf_hub_download
|
|
4 |
from pathlib import Path
|
5 |
from transformers import GPT2Config, GPT2LMHeadModel, GPT2TokenizerFast
|
6 |
import json
|
|
|
7 |
|
8 |
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
9 |
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
10 |
tokenizer.pad_token = tokenizer.eos_token
|
11 |
|
12 |
-
|
13 |
-
|
14 |
json_file = 'index.json'
|
15 |
with open(json_file, 'r') as file:
|
16 |
data = json.load(file)
|
@@ -19,7 +20,7 @@ for key, value in data.items():
|
|
19 |
inputs = tokenizer(text_description, return_tensors="pt", padding="max_length", max_length=128, truncation=True)
|
20 |
outputs = model(**inputs, labels=inputs["input_ids"])
|
21 |
logits = outputs.logits
|
22 |
-
|
23 |
|
24 |
|
25 |
def search_index(query):
|
@@ -28,7 +29,9 @@ def search_index(query):
|
|
28 |
|
29 |
max_similarity = float('-inf')
|
30 |
max_similarity_uuid = None
|
31 |
-
for
|
|
|
|
|
32 |
similarity = (outputs.logits * logits).sum()
|
33 |
if similarity > max_similarity:
|
34 |
max_similarity = similarity
|
|
|
4 |
from pathlib import Path
|
5 |
from transformers import GPT2Config, GPT2LMHeadModel, GPT2TokenizerFast
|
6 |
import json
|
7 |
+
import torch
|
8 |
|
9 |
model = GPT2LMHeadModel.from_pretrained('gpt2')
|
10 |
tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
|
11 |
tokenizer.pad_token = tokenizer.eos_token
|
12 |
|
13 |
+
temp_folder = 'temp'
|
14 |
+
os.mkdir(temp_folder, exist_ok=True)
|
15 |
json_file = 'index.json'
|
16 |
with open(json_file, 'r') as file:
|
17 |
data = json.load(file)
|
|
|
20 |
inputs = tokenizer(text_description, return_tensors="pt", padding="max_length", max_length=128, truncation=True)
|
21 |
outputs = model(**inputs, labels=inputs["input_ids"])
|
22 |
logits = outputs.logits
|
23 |
+
torch.save(logits, os.path.join(temp_folder, f"{key}.pt"))
|
24 |
|
25 |
|
26 |
def search_index(query):
|
|
|
29 |
|
30 |
max_similarity = float('-inf')
|
31 |
max_similarity_uuid = None
|
32 |
+
for file in os.listdir(temp_folder):
|
33 |
+
uuid = file.split('.')[0]
|
34 |
+
logits = torch.load(os.path.join(temp_folder, file))
|
35 |
similarity = (outputs.logits * logits).sum()
|
36 |
if similarity > max_similarity:
|
37 |
max_similarity = similarity
|