new_patent_app / app.py
rb757's picture
Add Streamlit app for patentability score prediction
5176e8f
raw
history blame
2.29 kB
import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from datasets import load_dataset
# Load model and tokenizer
model_path = "rb757/new_app"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Load the dataset
dataset_dict = load_dataset(
'HUPD/hupd',
name='sample',
data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
train_filing_start_date='2016-01-01',
train_filing_end_date='2016-01-21',
val_filing_start_date='2016-01-22',
val_filing_end_date='2016-01-31',
trust_remote_code=True
)
# Convert to DataFrame
train_df = pd.DataFrame(dataset_dict['train'])
val_df = pd.DataFrame(dataset_dict['validation'])
# Print columns to verify availability
print("Train set columns:", train_df.columns.tolist())
print("Validation set columns:", val_df.columns.tolist())
# Title and description
st.title("Milestone Patent 🐨")
st.write("Select a patent application to evaluate its patentability.")
# Dropdown for patent numbers
patent_numbers = train_df['patent_number'].unique()
selected_patent = st.selectbox("Select Patent Number", patent_numbers)
# Retrieve abstract and claims
if selected_patent:
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
abstract = patent_info['abstract']
claims = patent_info['claims']
# Display the abstract and claims
st.text_area("Abstract", abstract, height=150)
st.text_area("Claims", claims, height=150)
# Submit button
if st.button("Get Patentability Score"):
# Prepare the input text
input_text = f"{abstract} {claims}"
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
# Get the model prediction
with torch.no_grad():
logits = model(**inputs).logits
predictions = torch.argmax(logits, dim=-1)
# Display the patentability score
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
score = decision_labels[predictions.item()]
st.write(f"Patentability Score: **{score}**")