Spaces:

mo01018
/

Deployment-Trial

Sleeping

File size: 4,105 Bytes

from flask import Flask, render_template, request, redirect, url_for
from joblib import load
import pandas as pd
import re
from customFunctions import *
import json
import datetime
import numpy as np
from huggingface_hub import hf_hub_download
import torch
import os

pd.set_option('display.max_colwidth', 1000)


# Patch torch.load to always load on CPU
original_torch_load = torch.load
def cpu_load(*args, **kwargs):
    return original_torch_load(*args, map_location=torch.device('cpu'), **kwargs)

torch.load = cpu_load

def load_pipeline_from_hub(filename):
    cache_dir = "/tmp/hf_cache"
    os.environ["HF_HUB_CACHE"] = cache_dir  # optional but informative

    repo_id = 'hw01558/nlp-coursework-pipelines'
    local_path = hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
    return load(local_path)
    
    #repo_id = 'hw01558/nlp-coursework-pipelines'
    #local_path = hf_hub_download(repo_id=repo_id, filename=filename)
    #return load(local_path)

PIPELINES = [
    {
        'id': 1,
        'name': 'Baseline',
        'filename': "pipeline_ex1_s1.joblib"
    },
    {
        'id': 2,
        'name': 'Trained on a FeedForward NN',
        'filename': "pipeline_ex1_s2.joblib"
    },
    {
        'id': 3,
        'name': 'Trained on a CRF',
        'filename': "pipeline_ex1_s3.joblib"
    },
    {
        'id': 4,
        'name': 'Trained on a small dataset',
        'filename': "pipeline_ex2_s3.joblib"
    },
    {
        'id': 5,
        'name': 'Trained on a large dataset',
        'filename': "pipeline_ex2_s2.joblib"
    },
    {
        'id': 6,
        'name': 'Embedded using TFIDF',
        'filename': "pipeline_ex3_s2.joblib"
    },
    {
        'id': 7,
        'name': 'Embedded using GloVe',
        'filename': "pipeline_ex3_s3.joblib"
    },
    {
         'id': 8,
         'name': 'Embedded using Bio2Vec',
        'filename': "pipeline_ex3_s4.joblib"
    },
    
]

pipeline_metadata = [{'id': p['id'], 'name': p['name']} for p in PIPELINES]

def get_pipeline_by_id(pipelines, pipeline_id):
    return next((p['filename'] for p in pipelines if p['id'] == pipeline_id), None)

def get_name_by_id(pipelines, pipeline_id):
    return next((p['name'] for p in pipelines if p['id'] == pipeline_id), None)



def requestResults(text, pipeline):
    labels = pipeline.predict(text)
    if isinstance(labels, np.ndarray):
        labels = labels.tolist()
    return labels[0]

import os

LOG_FILE = "/tmp/usage_log.jsonl"  # Use temporary file path for Hugging Face Spaces

def log_interaction(user_input, model_name, predictions):
    log_entry = {
        "timestamp": datetime.datetime.utcnow().isoformat(),
        "model": model_name,
        "user_input": user_input,
        "predictions": predictions
    }

    try:
        os.makedirs(os.path.dirname(LOG_FILE), exist_ok=True)  # Ensure the directory exists
        with open(LOG_FILE, "a") as log_file:
            log_file.write(json.dumps(log_entry) + "\n")
    except Exception as e:
        print(f"Error writing to log: {e}")
        # You could also return a response with the error, or raise an error to stop the process


app = Flask(__name__)


@app.route('/')
def index():
    return render_template('index.html', pipelines= pipeline_metadata)


@app.route('/', methods=['POST'])
def get_data():
    if request.method == 'POST':

        text = request.form['search']
        tokens = re.findall(r"\w+|[^\w\s]", text)
        tokens_fomatted = pd.Series([pd.Series(tokens)])

        pipeline_id = int(request.form['pipeline_select'])
        pipeline = load_pipeline_from_hub(get_pipeline_by_id(PIPELINES, pipeline_id))
        name = get_name_by_id(PIPELINES, pipeline_id)
        
        labels = requestResults(tokens_fomatted, pipeline)
        results = dict(zip(tokens, labels))

        log_interaction(text, name, results)

        return render_template('index.html', results=results, name=name, pipelines= pipeline_metadata)


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)

#if __name__ == '__main__':
#app.run(host="0.0.0.0", port=7860)