Spaces:
Sleeping
Sleeping
Add Streamlit app for patentability score prediction
Browse files
app.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
-
# app.py
|
2 |
-
|
3 |
import streamlit as st
|
4 |
import pandas as pd
|
5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
6 |
import torch
|
|
|
7 |
|
8 |
# Load model and tokenizer
|
9 |
model_path = "rb757/new_app"
|
@@ -11,20 +10,35 @@ model = AutoModelForSequenceClassification.from_pretrained(model_path)
|
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
12 |
|
13 |
# Load the dataset
|
14 |
-
|
15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
# Title and description
|
18 |
st.title("Milestone Patent 🐨")
|
19 |
st.write("Select a patent application to evaluate its patentability.")
|
20 |
|
21 |
# Dropdown for application filing numbers
|
22 |
-
application_numbers =
|
23 |
selected_application = st.selectbox("Select Application Filing Number", application_numbers)
|
24 |
|
25 |
# Retrieve abstract and claims
|
26 |
if selected_application:
|
27 |
-
patent_info =
|
28 |
abstract = patent_info['abstract']
|
29 |
claims = patent_info['claims']
|
30 |
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
4 |
import torch
|
5 |
+
from datasets import load_dataset
|
6 |
|
7 |
# Load model and tokenizer
|
8 |
model_path = "rb757/new_app"
|
|
|
10 |
tokenizer = AutoTokenizer.from_pretrained(model_path)
|
11 |
|
12 |
# Load the dataset
|
13 |
+
dataset_dict = load_dataset(
|
14 |
+
'HUPD/hupd',
|
15 |
+
name='sample',
|
16 |
+
data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
|
17 |
+
train_filing_start_date='2016-01-01',
|
18 |
+
train_filing_end_date='2016-01-21',
|
19 |
+
val_filing_start_date='2016-01-22',
|
20 |
+
val_filing_end_date='2016-01-31',
|
21 |
+
)
|
22 |
+
|
23 |
+
# Convert to DataFrame
|
24 |
+
train_df = pd.DataFrame(dataset_dict['train'])
|
25 |
+
val_df = pd.DataFrame(dataset_dict['validation'])
|
26 |
+
|
27 |
+
# Print columns to verify availability
|
28 |
+
print("Train set columns:", train_df.columns.tolist())
|
29 |
+
print("Validation set columns:", val_df.columns.tolist())
|
30 |
|
31 |
# Title and description
|
32 |
st.title("Milestone Patent 🐨")
|
33 |
st.write("Select a patent application to evaluate its patentability.")
|
34 |
|
35 |
# Dropdown for application filing numbers
|
36 |
+
application_numbers = train_df['application_number'].unique()
|
37 |
selected_application = st.selectbox("Select Application Filing Number", application_numbers)
|
38 |
|
39 |
# Retrieve abstract and claims
|
40 |
if selected_application:
|
41 |
+
patent_info = train_df[train_df['application_number'] == selected_application].iloc[0]
|
42 |
abstract = patent_info['abstract']
|
43 |
claims = patent_info['claims']
|
44 |
|