Spaces:
Sleeping
Sleeping
File size: 2,768 Bytes
b7b6bac 76f4b94 b7b6bac 76f4b94 a594b91 76f4b94 b7b6bac 5176e8f b7b6bac 5176e8f b7b6bac c2519f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from datasets import load_dataset
# Load model and tokenizer
model_path = "rb757/new_app"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Load the dataset
dataset_dict = load_dataset(
'HUPD/hupd',
name='sample',
data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
train_filing_start_date='2016-01-01',
train_filing_end_date='2016-01-21',
val_filing_start_date='2016-01-22',
val_filing_end_date='2016-01-31',
trust_remote_code=True
)
# Convert to DataFrame
train_df = pd.DataFrame(dataset_dict['train'])
val_df = pd.DataFrame(dataset_dict['validation'])
# Print columns to verify availability
print("Train set columns:", train_df.columns.tolist())
print("Validation set columns:", val_df.columns.tolist())
# Title and description
st.title("Milestone Patent 🐨")
st.write("Select a patent application to evaluate its patentability.")
# Dropdown for patent numbers
patent_numbers = train_df['patent_number'].unique()
selected_patent = st.selectbox("Select Patent Number", patent_numbers)
# Retrieve abstract and claims
if selected_patent:
patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
abstract = patent_info['abstract']
claims = patent_info['claims']
# Display the abstract and claims
st.text_area("Abstract", abstract, height=150)
st.text_area("Claims", claims, height=150)
# Submit button
if st.button("Get Patentability Score"):
# Prepare the input text
input_text = f"{abstract} {claims}"
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)
# Get the model prediction
with torch.no_grad():
logits = model(**inputs).logits
predictions = torch.argmax(logits, dim=-1)
# Display the patentability score
decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
score = decision_labels[predictions.item()]
st.write(f"Patentability Score: **{score}**")
# Additional button to evaluate the model on the validation set
if st.button("Evaluate Model"):
eval_logits = []
for _, row in val_df.iterrows():
input_text = f"{row['abstract']} {row['claims']}"
inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding='max_length', max_length=512)
with torch.no_grad():
logits = model(**inputs).logits
eval_logits.append(logits)
st.write("Evaluation complete.")
|