Spaces:
Runtime error
Runtime error
# import gradio as gr | |
# gr.load("models/BAAI/bge-m3").launch() | |
import json | |
import faiss | |
import numpy as np | |
import gradio as gr | |
from FlagEmbedding import BGEM3FlagModel | |
# Define a function to load the ISCO taxonomy | |
def load_isco_taxonomy(file_path: str) -> list: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
isco_data = [json.loads(line.strip()) for line in file] | |
return isco_data | |
# Define a function to create a FAISS index | |
def create_faiss_index(isco_taxonomy, model_name='BAAI/bge-m3'): | |
model = BGEM3FlagModel(model_name, use_fp16=True) | |
texts = [str(entry['ESCO_DESCRIPTION']) for entry in isco_taxonomy] | |
embeddings = model.encode(texts, batch_size=12, max_length=256)['dense_vecs'] | |
embeddings = np.array(embeddings).astype('float32') | |
dimension = embeddings.shape[1] | |
index = faiss.IndexFlatL2(dimension) | |
index.add(embeddings) | |
faiss.write_index(index, 'isco_taxonomy.index') | |
with open('isco_taxonomy_mapping.json', 'w') as f: | |
json.dump({i: entry for i, entry in enumerate(isco_taxonomy)}, f) | |
# Define a function to retrieve and rerank using FAISS | |
def retrieve_and_rerank_faiss(job_duties, model_name="BAAI/bge-m3", top_k=4): | |
# Check if isco_taxonomy.index exists, if not, create it with create_faiss_index | |
if not os.path.exists("isco_taxonomy.index"): | |
isco_taxonomy = load_isco_taxonomy('isco_taxonomy.jsonl') | |
create_faiss_index(isco_taxonomy) | |
index = faiss.read_index("isco_taxonomy.index") | |
with open("isco_taxonomy_mapping.json", "r") as f: | |
isco_taxonomy = json.load(f) | |
model = BGEM3FlagModel(model_name, use_fp16=True) | |
query_embedding = model.encode([job_duties], max_length=256)["dense_vecs"] | |
query_embedding = np.array(query_embedding).astype("float32") | |
distances, indices = index.search(query_embedding, top_k) | |
results = [ | |
(isco_taxonomy[str(idx)]["ESCO_DESCRIPTION"], distances[0][i]) | |
for i, idx in enumerate(indices[0]) | |
] | |
return results | |
# Load data and create index (should be done once and then commented out or moved to a setup script) | |
# isco_taxonomy = load_isco_taxonomy('isco_taxonomy.jsonl') | |
# create_faiss_index(isco_taxonomy) | |
# Gradio Interface | |
def gradio_interface(job_duties): | |
results = retrieve_and_rerank_faiss(job_duties) | |
return [f"Description: {desc}, Distance: {dist}" for desc, dist in results] | |
iface = gr.Interface(fn=gradio_interface, inputs="text", outputs="text", title="Job Duties to ISCO Descriptions") | |
iface.launch() | |