Spaces:
Sleeping
Sleeping
from flask import Flask, render_template, request, redirect, url_for | |
from joblib import load | |
import pandas as pd | |
import re | |
from customFunctions import * | |
import json | |
import datetime | |
import numpy as np | |
from huggingface_hub import hf_hub_download | |
import torch | |
import os | |
pd.set_option('display.max_colwidth', 1000) | |
# Patch torch.load to always load on CPU | |
original_torch_load = torch.load | |
def cpu_load(*args, **kwargs): | |
return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs) | |
torch.load = cpu_load | |
def load_pipeline_from_hub(filename): | |
cache_dir = "/tmp/hf_cache" | |
os.environ["HF_HUB_CACHE"] = cache_dir # optional but informative | |
repo_id = 'hw01558/nlp-coursework-pipelines' | |
local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir) | |
return load(local_path) | |
#repo_id = 'hw01558/nlp-coursework-pipelines' | |
#local_path = hf_hub_download(repo_id=repo_id, filename=filename) | |
#return load(local_path) | |
PIPELINES = [ | |
{ | |
'id': 1, | |
'name': 'Baseline', | |
'filename': "pipeline_ex1_s1.joblib" | |
}, | |
{ | |
'id': 2, | |
'name': 'Trained on a FeedForward NN', | |
'filename': "pipeline_ex1_s2.joblib" | |
}, | |
{ | |
'id': 3, | |
'name': 'Trained on a CRF', | |
'filename': "pipeline_ex1_s3.joblib" | |
}, | |
{ | |
'id': 4, | |
'name': 'Trained on a small dataset', | |
'filename': "pipeline_ex2_s3.joblib" | |
}, | |
{ | |
'id': 5, | |
'name': 'Trained on a large dataset', | |
'filename': "pipeline_ex2_s2.joblib" | |
}, | |
{ | |
'id': 6, | |
'name': 'Embedded using TFIDF', | |
'filename': "pipeline_ex3_s2.joblib" | |
}, | |
{ | |
'id': 7, | |
'name': 'Embedded using GloVe', | |
'filename': "pipeline_ex3_s3.joblib" | |
}, | |
{ | |
'id': 8, | |
'name': 'Embedded using Bio2Vec', | |
'filename': "pipeline_ex3_s4.joblib" | |
}, | |
] | |
pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES] | |
def get_pipeline_by_id(pipelines, pipeline_id): | |
return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None) | |
def get_name_by_id(pipelines, pipeline_id): | |
return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None) | |
def requestResults(text, pipeline): | |
labels = pipeline.predict(text) | |
if isinstance(labels, np.ndarray): | |
labels = labels.tolist() | |
return labels[0] | |
import os | |
LOG_FILE = "/tmp/usage_log.jsonl" # Use temporary file path for Hugging Face Spaces | |
def log_interaction(user_input, model_name, predictions): | |
log_entry = { | |
"timestamp": datetime.datetime.utcnow().isoformat(), | |
"model": model_name, | |
"user_input": user_input, | |
"predictions": predictions | |
} | |
try: | |
os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True) # Ensure the directory exists | |
with open(LOG_FILE, "a") as log_file: | |
log_file.write(json.dumps(log_entry) + "\n") | |
except Exception as e: | |
print(f"Error writing to log: {e}") | |
# You could also return a response with the error, or raise an error to stop the process | |
app = Flask(__name__) | |
def index(): | |
return render_template('index.html', pipelines= pipeline_metadata) | |
def get_data(): | |
if request.method == 'POST': | |
text = request.form['search'] | |
tokens = re.findall(r"\w+|[^\w\s]", text) | |
tokens_fomatted = pd.Series([pd.Series(tokens)]) | |
pipeline_id = int(request.form['pipeline_select']) | |
pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id)) | |
name = get_name_by_id(PIPELINES, pipeline_id) | |
labels = requestResults(tokens_fomatted, pipeline) | |
results = dict(zip(tokens, labels)) | |
log_interaction(text, name, results) | |
return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata) | |
if __name__ == '__main__': | |
app.run(host="0.0.0.0", port=7860) | |
#if __name__ == '__main__': | |
#app.run(host="0.0.0.0", port=7860) | |