RvanB's picture
Update model and code
d7838fe
raw
history blame
1.15 kB
import os
import gradio as gr
import pandas as pd
import pymarc
from marcai.predict import predict
from marcai.process import process
from marcai.utils.parsing import record_dict
from marcai.pl import SimilarityVectorModel
root = os.path.dirname(os.path.abspath(__file__))
def compare(file1, file2):
# Load records
record1 = pymarc.parse_xml_to_array(file1)[0]
record2 = pymarc.parse_xml_to_array(file2)[0]
# Turn into dataframes
df1 = pd.DataFrame.from_dict([record_dict(record1)])
df2 = pd.DataFrame.from_dict([record_dict(record2)])
df = process(df1, df2)
model = SimilarityVectorModel.from_pretrained("cdlib/marc-match-ai")
input_df = df[model.features]
# Run model
prediction = predict(model, input_df).item()
return {"match": prediction, "not match": 1 - prediction}
interface = gr.Interface(
fn=compare,
inputs=[gr.File(label="MARC XML File 1"), gr.File(label="MARC XML File 2")],
outputs=gr.Label(label="Classification"),
title="MARC Record Matcher",
description="Upload two MARC XML files with one record each.",
allow_flagging="never",
)
interface.launch()