File size: 3,681 Bytes
0e46a6f
83e19cd
 
0e46a6f
83e19cd
 
 
7a4b44a
 
 
d5ab659
0e46a6f
83e19cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e46a6f
 
83e19cd
 
 
 
0e46a6f
83e19cd
 
 
 
 
 
 
 
 
0e46a6f
83e19cd
824c411
83e19cd
 
54d8a35
83e19cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import streamlit as st
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset

# Milestone-3
if "viability" not in st.session_state:
    st.session_state.viability = ""

if "score" not in st.session_state:
    st.session_state.score = ""


def get_patent_score(pipeline, abstract, claims):
    abstract_score = pipeline(abstract)
    claims_score = pipeline(claims)
    abstract_label = abstract_score[0]["label"]
    claims_label = claims_score[0]["label"]
    st.session_state.score = "{:.2f}".format(
        ((abstract_score[0]["score"] + claims_score[0]["score"]) / 2) * 100
    )
    if abstract_label == claims_label:
        st.session_state.viability = abstract_label
    else:
        if abstract_score[0]["score"] > claims_score[0]["label"]:
            st.session_state.viability = abstract_label
        else:
            st.session_state.viability = claims_label


checkpoint_file = "./checkpoint-3024"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_file)
tokenizer = AutoTokenizer.from_pretrained(checkpoint_file)
pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

dataset_dict = load_dataset('HUPD/hupd',
                            name='sample',
                            data_files="https://huggingface.co/datasets/HUPD/hupd/blob/main/hupd_metadata_2022-02-22.feather",
                            icpr_label=None,
                            train_filing_start_date='2016-01-01',
                            train_filing_end_date='2016-01-21',
                            val_filing_start_date='2016-01-22',
                            val_filing_end_date='2016-01-31',
                            )

dataset = dataset_dict["train"]

abstract_dict = {}
claims_dict = {}

for i in range(10):
    abstract_dict[dataset["title"][i]] = dataset["abstract"][i]
    claims_dict[dataset["title"][i]] = dataset["claims"][i]

st.title("Patent Vibility Score Checker")

chosen_patent = st.selectbox(
    "Chose a patent to run the checker on", options=abstract_dict.keys())
abstract = st.text_area(
    label="Abstract",
    value=abstract_dict[chosen_patent]
)
claims = st.text_area(
    label="Claims",
    value=claims_dict[chosen_patent]
)

st.button("Check Viability", on_click=get_patent_score,
          options=(pipeline, abstract, claims))

st.markdown(body="Outcome: {}, Score: {}%".format(
    st.session_state.viability, st.session_state.score))

# Milestone-2
# if "sentiment" not in st.session_state:
#     st.session_state.sentiment = ""

# if "score" not in st.session_state:
#     st.session_state.score = ""


# def run_model(text_in, model_in):
#     classifier = pipeline(task="sentiment-analysis",
#                           model=model_in)
#     analysis = classifier(text_in)
#     st.session_state.sentiment = analysis[0]["label"]
#     st.session_state.score = "{:.2f}".format(analysis[0]["score"] * 100)


# models_available = {"Roberta Large English": "siebert/sentiment-roberta-large-english",
#                     "Generic": "Seethal/sentiment_analysis_generic_dataset",
#                     "Twitter Roberta": "cardiffnlp/twitter-roberta-base-sentiment"}

# st.title("Sentiment Analysis Web Application")
# text_input = st.text_area(
#     label="Enter the text to analyze", value="I Love Pizza")
# model_picked = st.selectbox(
#     "Choose a model to run on", options=models_available.keys())

# st.button("Submit", on_click=run_model, args=(
#     text_input, models_available[model_picked]))

# st.markdown(body="Sentiment: {}, Confidence Score: {} %".format(
#     st.session_state.sentiment, st.session_state.score))