Spaces:
Runtime error
Runtime error
Commit
·
bf5ad3c
0
Parent(s):
First commit
Browse files- .streamlit/config.toml +15 -0
- Dockerfile +23 -0
- README.md +12 -0
- logo.png +0 -0
- packages.txt +0 -0
- pages/1_🏷_Label_Clause_Demo.py +98 -0
- pages/2_🏷_Label_Contract_Demo.py +121 -0
- pages/3_⛏_Extract_Demo.py +182 -0
- requirements.txt +63 -0
- 🏡_Home.py +55 -0
.streamlit/config.toml
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[theme]
|
2 |
+
base="light"
|
3 |
+
primaryColor="#F63366"
|
4 |
+
backgroundColor="#FFFFFF"
|
5 |
+
secondaryBackgroundColor="#F0F2F6"
|
6 |
+
textColor="#262730"
|
7 |
+
font="sans serif"
|
8 |
+
#primaryColor="#C7C6F9"
|
9 |
+
#backgroundColor="#FFFFFF"
|
10 |
+
#secondaryBackgroundColor="#A6A3F5"
|
11 |
+
#textColor="#06052B"
|
12 |
+
#font="sans serif"
|
13 |
+
|
14 |
+
[server]
|
15 |
+
maxUploadSize = 200
|
Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.8.9
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY ./requirements.txt /app/requirements.txt
|
6 |
+
COPY ./packages.txt /app/packages.txt
|
7 |
+
|
8 |
+
RUN apt-get update && xargs -r -a /app/packages.txt apt-get install -y && rm -rf /var/lib/apt/lists/*
|
9 |
+
RUN pip3 install --no-cache-dir -r /app/requirements.txt
|
10 |
+
|
11 |
+
# User
|
12 |
+
RUN useradd -m -u 1000 user
|
13 |
+
USER user
|
14 |
+
ENV HOME /home/user
|
15 |
+
ENV PATH $HOME/.local/bin:$PATH
|
16 |
+
|
17 |
+
WORKDIR $HOME
|
18 |
+
RUN mkdir app
|
19 |
+
WORKDIR $HOME/app
|
20 |
+
COPY . $HOME/app
|
21 |
+
|
22 |
+
EXPOSE 8501
|
23 |
+
CMD streamlit run app.py
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
title: Simplexico Label Demo
|
3 |
+
emoji: 📚
|
4 |
+
colorFrom: blue
|
5 |
+
colorTo: green
|
6 |
+
sdk: streamlit
|
7 |
+
sdk_version: 1.17.0
|
8 |
+
app_file: app.py
|
9 |
+
pinned: false
|
10 |
+
---
|
11 |
+
|
12 |
+
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
logo.png
ADDED
![]() |
packages.txt
ADDED
File without changes
|
pages/1_🏷_Label_Clause_Demo.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
|
6 |
+
from huggingface_hub import hf_hub_download
|
7 |
+
|
8 |
+
import streamlit as st
|
9 |
+
import streamlit.components.v1 as components
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
from lime.lime_text import LimeTextExplainer
|
14 |
+
from lime import lime_text
|
15 |
+
|
16 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
+
REPO_ID = "simplexico/cuad-sklearn-clause-classifier"
|
18 |
+
FILENAME = "CUAD-clause-classifier.pkl"
|
19 |
+
|
20 |
+
EXAMPLE_TEXT = """This Agreement and any dispute or claim arising out of or in connection with it
|
21 |
+
or its subject matter or formation (including non-contractual disputes or claims) shall be
|
22 |
+
governed by and construed in accordance with the law of England."""
|
23 |
+
|
24 |
+
## Layout stuff
|
25 |
+
st.set_page_config(
|
26 |
+
page_title="Label Clause Demo",
|
27 |
+
page_icon="🏷",
|
28 |
+
layout="wide",
|
29 |
+
initial_sidebar_state="expanded",
|
30 |
+
menu_items={
|
31 |
+
'Get Help': 'mailto:[email protected]',
|
32 |
+
'Report a bug': None,
|
33 |
+
'About': "## This a demo showcasing different Legal AI Actions"
|
34 |
+
}
|
35 |
+
)
|
36 |
+
|
37 |
+
st.title('🏷 Label Clause Demo')
|
38 |
+
st.write("""
|
39 |
+
This demo shows how AI can be used to label text.
|
40 |
+
We've trained an AI model to label a clause by its clause type.
|
41 |
+
""")
|
42 |
+
st.write("**👈 Enter a clause on the left** and hit the button **Label Clause** to see the demo in action")
|
43 |
+
|
44 |
+
|
45 |
+
@st.cache(allow_output_mutation=True)
|
46 |
+
def load_model():
|
47 |
+
model = joblib.load(
|
48 |
+
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, token=HF_TOKEN)
|
49 |
+
)
|
50 |
+
return model
|
51 |
+
|
52 |
+
|
53 |
+
@st.cache(allow_output_mutation=True)
|
54 |
+
def get_prediction_prob(text):
|
55 |
+
y_pred = model.predict([text])[0]
|
56 |
+
y_probs = model.predict_proba([text])[0]
|
57 |
+
return y_pred, y_probs
|
58 |
+
|
59 |
+
|
60 |
+
st.sidebar.title('Enter clause text')
|
61 |
+
text = st.sidebar.text_area(label='', value=EXAMPLE_TEXT, height=250)
|
62 |
+
button = st.sidebar.button('**Label Clause**', type='primary', use_container_width=True)
|
63 |
+
|
64 |
+
with st.spinner('Loading model...'):
|
65 |
+
model = load_model()
|
66 |
+
|
67 |
+
classes = [s.upper() for s in model.classes_]
|
68 |
+
|
69 |
+
if button:
|
70 |
+
with st.spinner('Processing Clause...'):
|
71 |
+
y_pred, y_probs = get_prediction_prob(text)
|
72 |
+
explainer = LimeTextExplainer(class_names=[cls[:9] + '…' for cls in model.classes_])
|
73 |
+
exp = explainer.explain_instance(text,
|
74 |
+
model.predict_proba,
|
75 |
+
num_features=10,
|
76 |
+
top_labels=1)
|
77 |
+
col1, col2 = st.columns(2)
|
78 |
+
with col1:
|
79 |
+
st.header('🤖 Prediction Results')
|
80 |
+
st.write(
|
81 |
+
f"The model predicts that this is a **{y_pred}** clause with **{y_probs.max() * 100:.2f}%** confidence.")
|
82 |
+
|
83 |
+
fig = go.Figure(go.Bar(
|
84 |
+
x=y_probs * 100,
|
85 |
+
y=model.classes_,
|
86 |
+
orientation='h'))
|
87 |
+
fig.update_layout(
|
88 |
+
title="Model Confidence",
|
89 |
+
xaxis_title="Confidence (%)",
|
90 |
+
yaxis_title="Clause Type",
|
91 |
+
)
|
92 |
+
st.plotly_chart(fig, use_container_width=True)
|
93 |
+
|
94 |
+
with col2:
|
95 |
+
st.header('🔮 Prediction Explainability')
|
96 |
+
st.write(
|
97 |
+
'We can perform an analysis to work out what terms in the clause were most important in deciding the predicted clause type:')
|
98 |
+
components.html(exp.as_html(predict_proba=False), height=800)
|
pages/2_🏷_Label_Contract_Demo.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import joblib
|
3 |
+
|
4 |
+
import plotly.graph_objects as go
|
5 |
+
|
6 |
+
import streamlit as st
|
7 |
+
import streamlit.components.v1 as components
|
8 |
+
|
9 |
+
from huggingface_hub import hf_hub_download
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
from lime.lime_text import LimeTextExplainer
|
14 |
+
from lime import lime_text
|
15 |
+
|
16 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
+
REPO_ID = "simplexico/cuad-sklearn-contract-classifier"
|
18 |
+
FILENAME = "CUAD-contract-classifier.pkl"
|
19 |
+
|
20 |
+
CHAR_LIMIT = 250
|
21 |
+
EXAMPLE_TEXT = """Exhibit 10.7
|
22 |
+
CONSULTING AGREEMENT
|
23 |
+
Aduro Biotech, Inc., with a place of business at 740 Heinz Avenue, Berkeley, CA 94710 ("Aduro") and IREYA B.V having an address at Staalwijkstraat 16, 2313 XR Leiden, the Netherlands, represented by Andrea van Elsas, ("Consultant") agree to all terms and conditions of this Consulting Agreement ("Agreement") dated June 1, 2020, effective as of July 1, 2020 ("Effective Date").
|
24 |
+
1. Services. At the request and direction of Aduro and the agreement of Consultant, Consultant will provide advice and consultation to Aduro with respect to its research, clinical development programs and other business matters as requested by Aduro from time to time.
|
25 |
+
2. Compensation and Expenses. Aduro shall pay Consultant for the Services at the rate of €500 per hour. On a monthly basis, Consultant shall submit to Aduro an invoice for the hours worked along with itemized documentation and receipts and other information for pre-approved travel and/or out-of- pocket expenses as Aduro reasonably requests at the time reimbursement is requested. Consultant will not incur any travel and/or other out-of-pocket expenses of more than €5,000 individually or €20,000 in the aggregate without the prior written consent of Aduro. Aduro shall pay Consultant any amounts due that are not reasonably disputed by Aduro, by check or direct bank deposit, within thirty days after receiving the invoice. Consultant's sole compensation for the Services shall be the amounts set forth above in this Section 2. Invoices shall be sent to the attention of:
|
26 |
+
[email protected] Attn: Accounts Payable
|
27 |
+
3. Term of Agreement. This Agreement shall begin on the Effective Date and shall continue until December 31, 2020, unless extended or earlier terminated. Either party may terminate this Agreement at any time on prior written notice to the other. This Agreement may be extended upon mutual written agreement of the parties.
|
28 |
+
4. Confidential Information.
|
29 |
+
(a) "Confidential Information" means any information, materials or methods in whatever form or embodiment that has not been made available by Aduro to the general public and any information, materials or methods in the possession or control of Consultant on the Effective Date or developed in the performance of the Services, except that Confidential Information shall not include any information, material or method that (i) at the time of disclosure is in, or after disclosure becomes part of the public domain, through no improper act on the part of Consultant or any of its employees; (ii) was in Consultant's possession at the time of disclosure, as shown by written evidence, and was not acquired, directly or indirectly, from work with Aduro; or (iii) Consultant receives from a third party, provided that such Confidential Information was not obtained by such third party, directly or indirectly, from Aduro.
|
30 |
+
Specific information disclosed as part of the Confidential Information shall not be deemed to be in the public domain or in the prior possession of Consultant merely because it is encompassed or contemplated by more general information in the public domain or in the prior possession of the Consultant. Failure to mark any of the Confidential Information as confidential or proprietary shall not affect its status as Confidential Information under the terms of this Agreement.
|
31 |
+
(b) Consultant shall keep all Confidential Information confidential, and Consultant shall not disclose, disseminate, publish, reproduce or use Confidential Information except to perform the Services. If Consultant is required by judicial or administrative process to disclose Confidential Information, Consultant shall promptly notify Aduro to allow Aduro a reasonable time to oppose such process and Consultant shall reasonably cooperate in Aduro's efforts.
|
32 |
+
(c) On Aduro's request, or upon the termination or expiration of this Agreement, Consultant shall immediately: (i) stop using Confidential Information; (ii) return all materials provided by Aduro to Consultant that contain Confidential Information, except for one copy that may be retained by Consultant's legal counsel to confirm compliance with the obligations under this Agreement; (iii) destroy all copies of Confidential Information in any form including Confidential Information contained in computer memory or data storage apparatus or materials prepared by or for Consultant; and (iv) provide a written warranty to Aduro that Consultant has taken all the actions described in the foregoing Subparagraphs 4(c)(i-iii).
|
33 |
+
(d) Any breach of this Paragraph 4 by an employee or agent of Consultant shall be deemed to be a breach by Consultant.
|
34 |
+
(e) Defend Trade Secrets Act Notice: Nothing herein shall prevent Consultant from reporting possible violations of federal or state law or regulation to any governmental agency or entity, or making other disclosures that are protected under the whistleblower provisions of federal or state law or regulation. Consultant does not need the prior authorization of Aduro to make any such reports or disclosures and is not required to notify Aduro that it has made such reports or disclosures. In addition, as set forth in 18 U.S.C. §1833(b), Consultant shall not be held criminally or civilly liable under any federal or state trade secret law for the disclosure of a trade secret that is made in confidence to a federal, state or local government official, either directly or indirectly, or to an attorney, and that is made solely for the purpose of reporting or investigating a suspected violation of law, or that is made in a complaint or other document filed in a lawsuit or other proceeding if such filing is made under seal.
|
35 |
+
5. Independent Contractor. Consultant's relationship to Aduro shall be that of an independent contractor. Consultant shall be responsible for the timely payment of his or her own self-employment and income taxes. Neither party shall have any authority to bind the other.
|
36 |
+
6. Intellectual Property. Aduro shall be the sole and exclusive owner of, and Consultant hereby assigns to Aduro, any and all writings, documents, work product, inventions, developments, improvements, discoveries, know-how, processes, chemical entities, compounds, plans, memoranda, tests, research, designs, specifications, models and data that Consultant makes, conceives, discovers or develops, either solely or jointly with any other person in performance of the Services (collectively, "Work Product"). Consultant shall promptly disclose to Aduro all information relating to Work Product as appropriate as part of the Services and at the request of Aduro. To the extent, if any, that Consultant has rights in or to any Work Product or any data or inventions developed in connection with work under this Agreement ("Aduro IP"), Consultant hereby irrevocably assigns and transfers to Aduro, and to the extent that an executory assignment is not enforceable, Consultant hereby agrees to assign and transfer to Aduro, in writing, from time to time, upon request, any and all right, title, or interest that Consultant has or may obtain in any Work Product and/or Aduro IP without the necessity of further consideration. Aduro shall be entitled to obtain and hold in its own name all copyrights, patents, trade secrets and trademarks with respect thereto. At Aduro's request and expense, Consultant shall assist Aduro in acquiring and maintaining its right in and title to, any Work Product. Such assistance may include, but will not be limited to, signing applications and other documents, cooperating in legal proceedings, and taking any other steps considered necessary or desirable by Aduro.
|
37 |
+
7. Nonsolicitation. From the Effective Date and for twelve (12) months after the termination of this Agreement (the "Restricted Period"), Consultant shall not, without Aduro's prior written consent, directly or indirectly, solicit or encourage any employee or contractor of Aduro or its affiliates to terminate employment with, or cease providing Services to, Aduro or its affiliates. In the event of a breach of this Paragraph 7 by Consultant, Aduro shall be entitled to entry of injunctive relief. Such injunctive remedy shall be nonexclusive and shall be in addition to any and all other remedies which may be available to it at law or in equity, including without limitation, the recovery of direct, indirect, incidental, consequential and/or punitive damages.
|
38 |
+
8. Representations. Consultant represents as follows:
|
39 |
+
(a) Consultant is not subject to any other agreement that Consultant will violate by signing this Agreement;
|
40 |
+
(b) Consultant has and shall continue to have the knowledge, experience, qualifications and required skill to perform, and shall perform, the Services in a professional manner;
|
41 |
+
(c) Consultant to perform the Services in accordance with all Applicable Law; and
|
42 |
+
(d) During the term of this Agreement, Consultant will not, directly or indirectly (whether for compensation or without compensation) engage in or provide consulting services, or enter into any agreement either written or oral, that would present a material conflict with any of the provisions of this Agreement, or would preclude Consultant from complying with the terms and conditions hereof. If during the term of this Agreement any situation or circumstance arises that might reasonably be expected to present a conflict of interest, or if Consultant might be unable to render Services or otherwise participate in such work without risk of breaching an obligation of confidentiality to another party, Consultant will promptly advise the Company's General Counsel of the situation and Company and Consultant shall, in good faith, attempt to resolve any such conflicts(s). If requested by the Company's General Counsel, Consultant will recuse herself from providing Services for the duration of the conflict.
|
43 |
+
9. Material Non-Public Information. Consultant may have access to, or learn, "material non-public information" about Aduro or companies working with Aduro during the course of performing Services under this Agreement. Consultant acknowledges that it is illegal to buy or sell Aduro's stock or the stock of companies working with Aduro, on the basis of "material non-public information." It is also illegal to pass such information on to others who use it to buy or sell Aduro stock. Consultant is subject to and will comply with Aduro's Insider Trading and Trading Window Policy.
|
44 |
+
10. Miscellaneous. This Agreement shall be construed and enforced in accordance with the laws of the State of California, without regard to the conflict of law principles of California or any other jurisdiction. This Agreement contains the entire agreement and understanding of the parties relating to the subject matter hereof and merges and supersedes all prior discussions, agreements and understandings of every nature between them with respect to the subject matter hereof. For the avoidance of doubt, this Agreement does not supersede or in modify in anyway any other written agreement between the parties. This Agreement may not be changed or modified, except by an agreement in writing signed by both of the parties hereto. The obligations of Consultant as set forth herein, other than Consultant's obligations to perform the Project, shall survive the termination of Consultant's engagement with Aduro. If any provision of this Agreement is found to be illegal or unenforceable, the other provisions of this Agreement shall remain effective and enforceable to the greatest extent permitted by law. This Agreement shall not be assignable by Consultant. This Agreement may be executed in any number of counterparts, and each such counterpart shall be deemed to be an original instrument, but all such counterparts together shall constitute but one agreement.
|
45 |
+
ADURO BIOTECH, INC. CONSULTANT
|
46 |
+
By: /s/ Stephen T. Isaacs By: /s/ Andrea van Elsas Name: Stephen T. Isaacs Name: Andrea van Elsas Title: President and Chief Executive Officer Title: Chief Scientific Officer"""
|
47 |
+
|
48 |
+
## Layout stuff
|
49 |
+
st.set_page_config(
|
50 |
+
page_title="Label Contract Demo",
|
51 |
+
page_icon="🏷",
|
52 |
+
layout="wide",
|
53 |
+
initial_sidebar_state="expanded",
|
54 |
+
menu_items={
|
55 |
+
'Get Help': 'mailto:[email protected]',
|
56 |
+
'Report a bug': None,
|
57 |
+
'About': "## This a demo showcasing different Legal AI Actions"
|
58 |
+
}
|
59 |
+
)
|
60 |
+
|
61 |
+
st.title('🏷 Label Contract Demo')
|
62 |
+
st.write("""
|
63 |
+
This demo shows how AI can be used to label text.
|
64 |
+
We've trained an AI model to label a contract by its contract type.
|
65 |
+
""")
|
66 |
+
st.write("**👈 Enter a contract on the left** and hit the button **Label Contract** to see the demo in action")
|
67 |
+
|
68 |
+
|
69 |
+
@st.cache(allow_output_mutation=True)
|
70 |
+
def load_model():
|
71 |
+
model = joblib.load(
|
72 |
+
hf_hub_download(repo_id=REPO_ID, filename=FILENAME, token=HF_TOKEN)
|
73 |
+
)
|
74 |
+
return model
|
75 |
+
|
76 |
+
|
77 |
+
@st.cache(allow_output_mutation=True)
|
78 |
+
def get_prediction_prob(text):
|
79 |
+
y_pred = model.predict([text])[0]
|
80 |
+
y_probs = model.predict_proba([text])[0]
|
81 |
+
return y_pred, y_probs
|
82 |
+
|
83 |
+
|
84 |
+
model = load_model()
|
85 |
+
classes = [s.title() for s in model.classes_]
|
86 |
+
|
87 |
+
text = st.sidebar.text_area('Enter Contract text:', value=EXAMPLE_TEXT, height=500)
|
88 |
+
button = st.sidebar.button('Label Contract', type='primary', use_container_width=True)
|
89 |
+
|
90 |
+
if button:
|
91 |
+
text = text[:250]
|
92 |
+
with st.spinner('Processing Contract..'):
|
93 |
+
y_pred, y_probs = get_prediction_prob(text)
|
94 |
+
explainer = LimeTextExplainer(class_names=[cls[:9] + '…' for cls in classes])
|
95 |
+
exp = explainer.explain_instance(text,
|
96 |
+
model.predict_proba,
|
97 |
+
num_features=10,
|
98 |
+
top_labels=1)
|
99 |
+
col1, col2 = st.columns(2)
|
100 |
+
with col1:
|
101 |
+
st.header('🤖 Prediction Results')
|
102 |
+
st.write(
|
103 |
+
f"The model predicts that this contract is a **{y_pred.title()}** with **{y_probs.max() * 100:.2f}%** confidence.")
|
104 |
+
|
105 |
+
fig = go.Figure(go.Bar(
|
106 |
+
x=y_probs * 100,
|
107 |
+
y=model.classes_,
|
108 |
+
orientation='h'))
|
109 |
+
fig.update_layout(
|
110 |
+
title="Model Confidence",
|
111 |
+
xaxis_title="Confidence (%)",
|
112 |
+
yaxis_title="Clause Type",
|
113 |
+
)
|
114 |
+
st.plotly_chart(fig, use_container_width=True)
|
115 |
+
|
116 |
+
with col2:
|
117 |
+
st.header('🔮 Prediction Explainability')
|
118 |
+
st.write(
|
119 |
+
'We can perform an analysis to work out what terms in the contract were most important in deciding the predicted contract type:')
|
120 |
+
|
121 |
+
components.html(exp.as_html(predict_proba=False), height=800)
|
pages/3_⛏_Extract_Demo.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
|
6 |
+
import streamlit as st
|
7 |
+
import streamlit.components.v1 as components
|
8 |
+
|
9 |
+
import spacy
|
10 |
+
from spacy import displacy
|
11 |
+
from spacy.tokens import Span
|
12 |
+
|
13 |
+
import pandas as pd
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
17 |
+
REPO_ID = "simplexico/cuad-spacy-custom-ner"
|
18 |
+
|
19 |
+
EXAMPLE_TEXT = """Exhibit 10.16 CONSULTING AGREEMENT
|
20 |
+
This Consulting Agreement (the "Agreement") is made and entered into as of this 2nd day of January 2020,
|
21 |
+
by and between Global Technologies, Ltd (hereinafter the "Company"),
|
22 |
+
a Delaware corporation whose address is 501 1st Ave N., Suite 901, St. Petersburg, FL 33701 and Timothy Cabrera (hereinafter the "Consultant"),
|
23 |
+
an individual whose address is 11718 SE Federal Hwy., Suite 372, Hobe Sound, FL 33455 (individually, a "Party"; collectively, the "Parties")."""
|
24 |
+
|
25 |
+
## Layout stuff
|
26 |
+
st.set_page_config(
|
27 |
+
page_title="Extract Demo",
|
28 |
+
page_icon="⛏",
|
29 |
+
layout="wide",
|
30 |
+
initial_sidebar_state="expanded",
|
31 |
+
menu_items={
|
32 |
+
'Get Help': 'mailto:[email protected]',
|
33 |
+
'Report a bug': None,
|
34 |
+
'About': "## This a demo showcasing different Legal AI Actions"
|
35 |
+
}
|
36 |
+
)
|
37 |
+
|
38 |
+
st.title('⛏ Extract Demo')
|
39 |
+
st.write("""
|
40 |
+
This demo shows how AI can be used to extract information from text.
|
41 |
+
We've trained an AI model to extract key pieces of information from a contract recital.
|
42 |
+
""")
|
43 |
+
st.write("**👈 Enter a contract recital on the left** and hit the button **Extract Data** to see the demo in action")
|
44 |
+
|
45 |
+
|
46 |
+
@st.cache(allow_output_mutation=True)
|
47 |
+
def load_model():
|
48 |
+
snapshot_download(repo_id=REPO_ID, token=HF_TOKEN, local_dir='./')
|
49 |
+
nlp = spacy.load('model-best')
|
50 |
+
return nlp
|
51 |
+
|
52 |
+
|
53 |
+
text = st.sidebar.text_area('Enter Clause text:', value=EXAMPLE_TEXT, height=350)
|
54 |
+
button = st.sidebar.button('Extract Data', type='primary', use_container_width=True)
|
55 |
+
|
56 |
+
with st.spinner('Loading model...'):
|
57 |
+
nlp = load_model()
|
58 |
+
|
59 |
+
|
60 |
+
def check_span_pair_for_overlap(span1, span2):
|
61 |
+
""" Checks a pair of spans for any overlapping ranges
|
62 |
+
Args:
|
63 |
+
span1: (start, end) tuple
|
64 |
+
span2: (start, end) tuple
|
65 |
+
Return:
|
66 |
+
True if overlap, False otherwise
|
67 |
+
"""
|
68 |
+
# remove offset
|
69 |
+
minimum = min(span1[0], span2[0])
|
70 |
+
span1 = (span1[0] - minimum, span1[1] - minimum)
|
71 |
+
span2 = (span2[0] - minimum, span2[1] - minimum)
|
72 |
+
|
73 |
+
maximum = max(span1[1], span2[1])
|
74 |
+
vec1 = np.zeros(maximum)
|
75 |
+
vec1[span1[0]:span1[1]] = 1
|
76 |
+
vec2 = np.zeros(maximum)
|
77 |
+
vec2[span2[0]:span2[1]] = 1
|
78 |
+
if np.dot(vec1, vec2):
|
79 |
+
return True
|
80 |
+
return False
|
81 |
+
|
82 |
+
|
83 |
+
def add_detected_persons_as_parties(doc):
|
84 |
+
nlp = spacy.load('en_core_web_md')
|
85 |
+
|
86 |
+
doc_ = nlp(doc.text)
|
87 |
+
original_ents = list(doc.ents)
|
88 |
+
|
89 |
+
for ent in doc_.ents:
|
90 |
+
if ent.label_ == 'PERSON':
|
91 |
+
if not any([check_span_pair_for_overlap((ent.start, ent.end), (ent_.start, ent_.end)) for ent_ in
|
92 |
+
original_ents]):
|
93 |
+
print(ent)
|
94 |
+
# check for overlapping ents
|
95 |
+
|
96 |
+
original_ents.append(Span(doc, ent.start, ent.end, label='parties'))
|
97 |
+
|
98 |
+
doc.ents = original_ents
|
99 |
+
|
100 |
+
return doc
|
101 |
+
|
102 |
+
|
103 |
+
def add_rule_based_entites(doc):
|
104 |
+
"""Adds rule based entity spans to document
|
105 |
+
Args:
|
106 |
+
doc (spacy.tokens.doc.Doc)
|
107 |
+
"""
|
108 |
+
patterns = [
|
109 |
+
('[0-9]+[\s]+[a-zA-Z0-9.\-\,\#]+[\s]*[a-zA-Z0-9.\-\,\#]+[a-zA-Z0-9\s.\-\,\#]*\s[0-9]+', 'address'),
|
110 |
+
('Consultant|Company|Party|Parties', 'role'),
|
111 |
+
]
|
112 |
+
|
113 |
+
for pattern, label in patterns:
|
114 |
+
ents = []
|
115 |
+
for match in re.finditer(pattern, doc.text):
|
116 |
+
start, end = match.span()
|
117 |
+
span = doc.char_span(start, end)
|
118 |
+
if span is not None:
|
119 |
+
ents.append((span.start, span.end, span.text))
|
120 |
+
|
121 |
+
original_ents = list(doc.ents)
|
122 |
+
|
123 |
+
for ent in ents:
|
124 |
+
start, end, address = ent
|
125 |
+
per_ent = Span(doc, start, end, label=label)
|
126 |
+
original_ents.append(per_ent)
|
127 |
+
|
128 |
+
doc.ents = original_ents
|
129 |
+
|
130 |
+
return doc
|
131 |
+
|
132 |
+
|
133 |
+
if button:
|
134 |
+
col1, col2 = st.columns(2)
|
135 |
+
doc = nlp(text)
|
136 |
+
|
137 |
+
doc = add_rule_based_entites(doc)
|
138 |
+
doc = add_detected_persons_as_parties(doc)
|
139 |
+
with col1:
|
140 |
+
st.header('🎨 Highlighted Text')
|
141 |
+
|
142 |
+
colors = {'party': "#85C1E9", "address": "#ff6961", "agreement_date": "#5de36f", "role": "#b05de3"}
|
143 |
+
options = {"ents": ['party', 'address', 'agreement_date', 'role'], "colors": colors}
|
144 |
+
|
145 |
+
label_aliases = {
|
146 |
+
'parties': 'Party',
|
147 |
+
'address': 'Address',
|
148 |
+
'agreement_date': 'Agreement Date',
|
149 |
+
'role': 'Role'
|
150 |
+
}
|
151 |
+
|
152 |
+
doc.spans["sc"] = [
|
153 |
+
Span(doc, ent.start, ent.end, label_aliases[ent.label_]) for ent in doc.ents
|
154 |
+
]
|
155 |
+
|
156 |
+
html = displacy.render(doc, style="span", options=options)
|
157 |
+
components.html(html, height=400)
|
158 |
+
|
159 |
+
with col2:
|
160 |
+
# display table
|
161 |
+
data = {
|
162 |
+
'Text': [],
|
163 |
+
'Label': []
|
164 |
+
}
|
165 |
+
|
166 |
+
st.header('📊 *.pyExtracted Information')
|
167 |
+
for span in doc.spans['sc']:
|
168 |
+
data['Label'].append(span.label_)
|
169 |
+
data['Text'].append(span.text)
|
170 |
+
df = pd.DataFrame(data)
|
171 |
+
|
172 |
+
hide_dataframe_row_index = """
|
173 |
+
<style>
|
174 |
+
.row_heading.level0 {display:none}
|
175 |
+
.blank {display:none}
|
176 |
+
</style>
|
177 |
+
"""
|
178 |
+
|
179 |
+
# Inject CSS with Markdown
|
180 |
+
st.markdown(hide_dataframe_row_index, unsafe_allow_html=True)
|
181 |
+
|
182 |
+
st.table(df)
|
requirements.txt
ADDED
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==4.2.2
|
2 |
+
attrs==22.2.0
|
3 |
+
blinker==1.5
|
4 |
+
cachetools==5.3.0
|
5 |
+
certifi==2022.12.7
|
6 |
+
charset-normalizer==3.1.0
|
7 |
+
click==8.1.3
|
8 |
+
cloudpickle==2.2.1
|
9 |
+
decorator==5.1.1
|
10 |
+
entrypoints==0.4
|
11 |
+
filelock==3.10.0
|
12 |
+
gitdb==4.0.10
|
13 |
+
GitPython==3.1.31
|
14 |
+
huggingface-hub==0.13.2
|
15 |
+
idna==3.4
|
16 |
+
importlib-metadata==6.0.0
|
17 |
+
Jinja2==3.1.2
|
18 |
+
joblib==1.2.0
|
19 |
+
jsonschema==4.17.3
|
20 |
+
lime==0.2.0.1
|
21 |
+
markdown-it-py==2.2.0
|
22 |
+
MarkupSafe==2.1.2
|
23 |
+
matplotlib==3.7.1
|
24 |
+
mdurl==0.1.2
|
25 |
+
nltk==3.8.1
|
26 |
+
numpy==1.24.2
|
27 |
+
packaging==23.0
|
28 |
+
pandas==1.5.3
|
29 |
+
Pillow==9.4.0
|
30 |
+
plotly==5.13.1
|
31 |
+
protobuf==3.20.3
|
32 |
+
pyarrow==11.0.0
|
33 |
+
pydeck==0.8.0
|
34 |
+
Pygments==2.14.0
|
35 |
+
Pympler==1.0.1
|
36 |
+
pyrsistent==0.19.3
|
37 |
+
python-dateutil==2.8.2
|
38 |
+
pytz==2022.7.1
|
39 |
+
pytz-deprecation-shim==0.1.0.post0
|
40 |
+
PyYAML==6.0
|
41 |
+
regex==2022.10.31
|
42 |
+
requests==2.28.2
|
43 |
+
rich==13.3.2
|
44 |
+
scikit-learn==1.2.2
|
45 |
+
scipy==1.10.1
|
46 |
+
semver==2.13.0
|
47 |
+
six==1.16.0
|
48 |
+
spacy==3.5.1
|
49 |
+
https://huggingface.co/spacy/en_core_web_md/resolve/main/en_core_web_md-any-py3-none-any.whl
|
50 |
+
smmap==5.0.0
|
51 |
+
streamlit==1.20.0
|
52 |
+
threadpoolctl==3.1.0
|
53 |
+
toml==0.10.2
|
54 |
+
toolz==0.12.0
|
55 |
+
tornado==6.2
|
56 |
+
tqdm==4.65.0
|
57 |
+
typing_extensions==4.5.0
|
58 |
+
tzdata==2022.7
|
59 |
+
tzlocal==4.2
|
60 |
+
urllib3==1.26.15
|
61 |
+
validators==0.20.0
|
62 |
+
watchdog==2.3.1
|
63 |
+
zipp==3.15.0
|
🏡_Home.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
|
4 |
+
st.set_page_config(
|
5 |
+
page_title="Legal AI Demos",
|
6 |
+
page_icon="👋",
|
7 |
+
)
|
8 |
+
|
9 |
+
image = Image.open('logo.png')
|
10 |
+
st.image(image, use_column_width=True)
|
11 |
+
|
12 |
+
st.write("# Welcome to the Legal AI Action Demos from simplexico! 👋")
|
13 |
+
|
14 |
+
st.sidebar.success("👆 Select a demo above.")
|
15 |
+
|
16 |
+
|
17 |
+
st.markdown(
|
18 |
+
"""
|
19 |
+
Here at simplexico, we've put together a collection of demos showcasing what AI can do in legal.
|
20 |
+
AI tends go get spoken about in vague terms. We like to think about AI in terms of the specific
|
21 |
+
action it can perform. These demos are meant to cut through all the hype and noise so you can simplify see
|
22 |
+
for yourself what AI can and can't do.
|
23 |
+
|
24 |
+
Here you will find demos for the most common Legal AI Actions including:
|
25 |
+
- 🏷 **Label** - Using AI to **label** text
|
26 |
+
- ⛏ **Extract** - Using AI to **extract** information from text
|
27 |
+
- 🔗 **Compare** - Using AI to **compare** passages of text
|
28 |
+
|
29 |
+
**👈 Select a demo from the sidebar** to see some examples of what Legal AI can do!
|
30 |
+
|
31 |
+
## What goes into making an AI model?
|
32 |
+
Building an AI model is a lot like cooking.
|
33 |
+
A 👩🍳 chef (data scientist) combines the 🥒 ingredients (data) according to a recipe 📜 (algorithm)
|
34 |
+
and 🍳 cooks the meal (trains the model) in the 🔪 kitchen (computing environment).
|
35 |
+
Once the 🥧 meal (AI model) is ready, it can be 🛎 served (deployed) to a 💁🏻customer (user)
|
36 |
+
ready to be 🍽 eaten (used) and enjoyed 😋 (meeting the user's needs).
|
37 |
+
|
38 |
+
## Showcasing our Recipes
|
39 |
+
Our Legal AI Chefs 🧑🍳 have prepared a selection of recipes 📜 and with publicly sourced
|
40 |
+
ingredients 🥒 they have created some fantastic meals 🥧 (AI demos) for you to try.
|
41 |
+
We've also peppered on some explainability so you can see what the AI model is thinking.
|
42 |
+
Try them out 🍽!
|
43 |
+
|
44 |
+
## About Us
|
45 |
+
simplexico offers white-glove Legal AI education, design and development services. We are on a mission to help
|
46 |
+
legal professionals step into a future of collaboration with AI.
|
47 |
+
We have the Legal AI Chefs 👩🍳 and recipes 📜.
|
48 |
+
You have the ingredients 🥒 (data).
|
49 |
+
Let's work together to tailor some yummy Legal AI meals just for your taste 🥧!
|
50 |
+
|
51 |
+
### Want to learn more?
|
52 |
+
- 🌐 Check out our website [simplexico.ai](https://simplexico.ai)
|
53 |
+
- 📞 Book a Call With Us [Book a call](https://calendly.com/uwais-iqbal/discovery-call)
|
54 |
+
"""
|
55 |
+
)
|