File size: 4,004 Bytes
b7b6bac
 
 
 
76f4b94
b7b6bac
 
 
 
 
 
 
76f4b94
 
 
 
 
 
 
 
a594b91
76f4b94
 
 
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
b7b6bac
0765bcf
5176e8f
0765bcf
b7b6bac
 
0765bcf
 
 
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
b7b6bac
0765bcf
 
 
 
b7b6bac
0765bcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b7b6bac
 
 
 
0765bcf
b7b6bac
 
 
 
 
 
 
 
 
 
0765bcf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
import pandas as pd
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
from datasets import load_dataset

# Load model and tokenizer
model_path = "rb757/new_app"
model = AutoModelForSequenceClassification.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# Load the dataset
dataset_dict = load_dataset(
    'HUPD/hupd',
    name='sample',
    data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
    train_filing_start_date='2016-01-01',
    train_filing_end_date='2016-01-21',
    val_filing_start_date='2016-01-22',
    val_filing_end_date='2016-01-31',
    trust_remote_code=True
)

# Convert to DataFrame
train_df = pd.DataFrame(dataset_dict['train'])
val_df = pd.DataFrame(dataset_dict['validation'])

# Print columns to verify availability
print("Train set columns:", train_df.columns.tolist())
print("Validation set columns:", val_df.columns.tolist())

# Check if 'patent_number' exists
if 'patent_number' not in train_df.columns:
    st.error("Column 'patent_number' not found in the training dataset.")
else:
    # Title and description
    st.title("📜 Milestone Patent Evaluation 🐨")
    st.write("Select a patent application to evaluate its patentability.")

    # Dropdown for patent numbers
    patent_numbers = train_df['patent_number'].unique()
    selected_patent = st.selectbox("Select Patent Number", patent_numbers)

    # Retrieve relevant information
    patent_info = train_df[train_df['patent_number'] == selected_patent].iloc[0]
    title = patent_info['title']
    abstract = patent_info['abstract']
    claims = patent_info['claims']
    background = patent_info['background']
    summary = patent_info['summary']
    description = patent_info['description']
    cpc_label = patent_info['cpc_label']
    ipc_label = patent_info['ipc_label']
    filing_date = patent_info['filing_date']
    patent_issue_date = patent_info['patent_issue_date']
    date_published = patent_info['date_published']
    examiner_id = patent_info['examiner_id']

    # Display the information
    st.markdown("### Title")
    st.markdown(f"**{title}**")
    
    st.markdown("---")
    
    st.markdown("### Abstract")
    st.text_area("Abstract", abstract, height=150)
    
    st.markdown("---")
    
    st.markdown("### Claims")
    st.text_area("Claims", claims, height=150)
    
    st.markdown("---")
    
    st.markdown("### Background")
    st.text_area("Background", background, height=150)
    
    st.markdown("---")
    
    st.markdown("### Summary")
    st.text_area("Summary", summary, height=150)
    
    st.markdown("---")
    
    st.markdown("### Description")
    st.text_area("Description", description, height=150)
    
    st.markdown("---")
    
    st.markdown("### CPC Label")
    st.markdown(f"**{cpc_label}**")
    
    st.markdown("### IPC Label")
    st.markdown(f"**{ipc_label}**")
    
    st.markdown("### Filing Date")
    st.markdown(f"**{filing_date}**")
    
    st.markdown("### Patent Issue Date")
    st.markdown(f"**{patent_issue_date}**")
    
    st.markdown("### Date Published")
    st.markdown(f"**{date_published}**")
    
    st.markdown("### Examiner ID")
    st.markdown(f"**{examiner_id}**")

    # Submit button
    if st.button("Get Patentability Score"):
        # Prepare the input text
        input_text = f"{title} {abstract} {claims} {background} {summary} {description}"
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)

        # Get the model prediction
        with torch.no_grad():
            logits = model(**inputs).logits
            predictions = torch.argmax(logits, dim=-1)

        # Display the patentability score
        decision_labels = ['REJECTED', 'ACCEPTED', 'PENDING', 'CONT-REJECTED', 'CONT-ACCEPTED', 'CONT-PENDING']
        score = decision_labels[predictions.item()]
        st.success(f"Patentability Score: **{score}**")