Spaces:

rb757
/

new_patent_app

Sleeping

File size: 4,004 Bytes

b7b6bac
 
 
 
76f4b94
b7b6bac
 
 
 
 
 
 
76f4b94
 
 
 
 
 
 
 
a594b91
76f4b94
 
 
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
b7b6bac
0765bcf
5176e8f
0765bcf
b7b6bac
 
0765bcf
 
 
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7b6bac
 
 
 
0765bcf
b7b6bac
 
 
 
 
 
 
 
 
 
0765bcf

import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from datasets import load_dataset

# Load model and tokenizer
model_path = "rb757/new_app"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the dataset
dataset_dict = load_dataset(
    'HUPD/hupd',
    name='sample',
    data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
    train_filing_start_date='2016-01-01',
    train_filing_end_date='2016-01-21',
    val_filing_start_date='2016-01-22',
    val_filing_end_date='2016-01-31',
    trust_remote_code=True
)

# Convert to DataFrame
train_df = pd.DataFrame(dataset_dict['train'])
val_df = pd.DataFrame(dataset_dict['validation'])

# Print columns to verify availability
print("Train set columns:", train_df.columns.tolist())
print("Validation set columns:", val_df.columns.tolist())

# Check if 'patent_number' exists
if 'patent_number' not in train_df.columns:
    st.error("Column 'patent_number' not found in the training dataset.")
else:
    # Title and description
    st.title("📜 Milestone Patent Evaluation 🐨")
    st.write("Select a patent application to evaluate its patentability.")

    # Dropdown for patent numbers
    patent_numbers = train_df['patent_number'].unique()
    selected_patent = st.selectbox("Select Patent Number", patent_numbers)

    # Retrieve relevant information
    patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
    title = patent_info['title']
    abstract = patent_info['abstract']
    claims = patent_info['claims']
    background = patent_info['background']
    summary = patent_info['summary']
    description = patent_info['description']
    cpc_label = patent_info['cpc_label']
    ipc_label = patent_info['ipc_label']
    filing_date = patent_info['filing_date']
    patent_issue_date = patent_info['patent_issue_date']
    date_published = patent_info['date_published']
    examiner_id = patent_info['examiner_id']

    # Display the information
    st.markdown("### Title")
    st.markdown(f"**{title}**")
    
    st.markdown("---")
    
    st.markdown("### Abstract")
    st.text_area("Abstract", abstract, height=150)
    
    st.markdown("---")
    
    st.markdown("### Claims")
    st.text_area("Claims", claims, height=150)
    
    st.markdown("---")
    
    st.markdown("### Background")
    st.text_area("Background", background, height=150)
    
    st.markdown("---")
    
    st.markdown("### Summary")
    st.text_area("Summary", summary, height=150)
    
    st.markdown("---")
    
    st.markdown("### Description")
    st.text_area("Description", description, height=150)
    
    st.markdown("---")
    
    st.markdown("### CPC Label")
    st.markdown(f"**{cpc_label}**")
    
    st.markdown("### IPC Label")
    st.markdown(f"**{ipc_label}**")
    
    st.markdown("### Filing Date")
    st.markdown(f"**{filing_date}**")
    
    st.markdown("### Patent Issue Date")
    st.markdown(f"**{patent_issue_date}**")
    
    st.markdown("### Date Published")
    st.markdown(f"**{date_published}**")
    
    st.markdown("### Examiner ID")
    st.markdown(f"**{examiner_id}**")

    # Submit button
    if st.button("Get Patentability Score"):
        # Prepare the input text
        input_text = f"{title} {abstract} {claims} {background} {summary} {description}"
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)

        # Get the model prediction
        with torch.no_grad():
            logits = model(**inputs).logits
            predictions = torch.argmax(logits, dim=-1)

        # Display the patentability score
        decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
        score = decision_labels[predictions.item()]
        st.success(f"Patentability Score: **{score}**")