Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
import torch | |
from datasets import load_dataset | |
# Load model and tokenizer | |
model_path = "rb757/new_app" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
# Load the dataset | |
dataset_dict = load_dataset( | |
'HUPD/hupd', | |
name='sample', | |
data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather", | |
train_filing_start_date='2016-01-01', | |
train_filing_end_date='2016-01-21', | |
val_filing_start_date='2016-01-22', | |
val_filing_end_date='2016-01-31', | |
trust_remote_code=True | |
) | |
# Convert to DataFrame | |
train_df = pd.DataFrame(dataset_dict['train']) | |
val_df = pd.DataFrame(dataset_dict['validation']) | |
# Print columns to verify availability | |
print("Train set columns:", train_df.columns.tolist()) | |
print("Validation set columns:", val_df.columns.tolist()) | |
# Title and description | |
st.title("Milestone Patent 🐨") | |
st.write("Select a patent application to evaluate its patentability.") | |
# Dropdown for patent numbers | |
patent_numbers = train_df['patent_number'].unique() | |
selected_patent = st.selectbox("Select Patent Number", patent_numbers) | |
# Retrieve abstract and claims | |
if selected_patent: | |
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0] | |
abstract = patent_info['abstract'] | |
claims = patent_info['claims'] | |
# Display the abstract and claims | |
st.text_area("Abstract", abstract, height=150) | |
st.text_area("Claims", claims, height=150) | |
# Submit button | |
if st.button("Get Patentability Score"): | |
# Prepare the input text | |
input_text = f"{abstract} {claims}" | |
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) | |
# Get the model prediction | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
predictions = torch.argmax(logits, dim=-1) | |
# Display the patentability score | |
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING'] | |
score = decision_labels[predictions.item()] | |
st.write(f"Patentability Score: **{score}**") | |