rb757 commited on
Commit
76f4b94
·
1 Parent(s): b7b6bac

Add Streamlit app for patentability score prediction

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -1,9 +1,8 @@
1
- # app.py
2
-
3
  import streamlit as st
4
  import pandas as pd
5
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
6
  import torch
 
7
 
8
  # Load model and tokenizer
9
  model_path = "rb757/new_app"
@@ -11,20 +10,35 @@ model = AutoModelForSequenceClassification.from_pretrained(model_path)
11
  tokenizer = AutoTokenizer.from_pretrained(model_path)
12
 
13
  # Load the dataset
14
- dataset_url = "https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather"
15
- df = pd.read_feather(dataset_url)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
  # Title and description
18
  st.title("Milestone Patent 🐨")
19
  st.write("Select a patent application to evaluate its patentability.")
20
 
21
  # Dropdown for application filing numbers
22
- application_numbers = df['application_number'].unique()
23
  selected_application = st.selectbox("Select Application Filing Number", application_numbers)
24
 
25
  # Retrieve abstract and claims
26
  if selected_application:
27
- patent_info = df[df['application_number'] == selected_application].iloc[0]
28
  abstract = patent_info['abstract']
29
  claims = patent_info['claims']
30
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
4
  import torch
5
+ from datasets import load_dataset
6
 
7
  # Load model and tokenizer
8
  model_path = "rb757/new_app"
 
10
  tokenizer = AutoTokenizer.from_pretrained(model_path)
11
 
12
  # Load the dataset
13
+ dataset_dict = load_dataset(
14
+ 'HUPD/hupd',
15
+ name='sample',
16
+ data_files="https://huggingface.co/datasets/HUPD/hupd/resolve/main/hupd_metadata_2022-02-22.feather",
17
+ train_filing_start_date='2016-01-01',
18
+ train_filing_end_date='2016-01-21',
19
+ val_filing_start_date='2016-01-22',
20
+ val_filing_end_date='2016-01-31',
21
+ )
22
+
23
+ # Convert to DataFrame
24
+ train_df = pd.DataFrame(dataset_dict['train'])
25
+ val_df = pd.DataFrame(dataset_dict['validation'])
26
+
27
+ # Print columns to verify availability
28
+ print("Train set columns:", train_df.columns.tolist())
29
+ print("Validation set columns:", val_df.columns.tolist())
30
 
31
  # Title and description
32
  st.title("Milestone Patent 🐨")
33
  st.write("Select a patent application to evaluate its patentability.")
34
 
35
  # Dropdown for application filing numbers
36
+ application_numbers = train_df['application_number'].unique()
37
  selected_application = st.selectbox("Select Application Filing Number", application_numbers)
38
 
39
  # Retrieve abstract and claims
40
  if selected_application:
41
+ patent_info = train_df[train_df['application_number'] == selected_application].iloc[0]
42
  abstract = patent_info['abstract']
43
  claims = patent_info['claims']
44