import streamlit as st import pandas as pd from transformers import AutoModelForSequenceClassification, AutoTokenizer import torch from datasets import load_dataset # Load model and tokenizer model_path = "rb757/new_app" model = AutoModelForSequenceClassification.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) # Load the dataset dataset_dict = load_dataset( 'HUPD/hupd', name='sample', data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather", train_filing_start_date='2016-01-01', train_filing_end_date='2016-01-21', val_filing_start_date='2016-01-22', val_filing_end_date='2016-01-31', trust_remote_code=True, ) # Convert to DataFrame train_df = pd.DataFrame(dataset_dict['train']) val_df = pd.DataFrame(dataset_dict['validation']) # Print columns to verify availability print("Train set columns:", train_df.columns.tolist()) print("Validation set columns:", val_df.columns.tolist()) # Title and description st.title("Milestone Patent 🐨") st.write("Select a patent application to evaluate its patentability.") # Dropdown for application filing numbers application_numbers = train_df['application_number'].unique() selected_application = st.selectbox("Select Application Filing Number", application_numbers) # Retrieve abstract and claims if selected_application: patent_info = train_df[train_df['application_number'] == selected_application].iloc[0] abstract = patent_info['abstract'] claims = patent_info['claims'] # Display the abstract and claims st.text_area("Abstract", abstract, height=150) st.text_area("Claims", claims, height=150) # Submit button if st.button("Get Patentability Score"): # Prepare the input text input_text = f"{abstract} {claims}" inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True) # Get the model prediction with torch.no_grad(): logits = model(**inputs).logits predictions = torch.argmax(logits, dim=-1) # Display the patentability score decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING'] score = decision_labels[predictions.item()] st.write(f"Patentability Score: **{score}**")