Spaces:
Sleeping
Sleeping
Commit
Β·
3932ad8
1
Parent(s):
3488f7c
init repo
Browse files- README.md +7 -4
- app.py +85 -0
- requirements.txt +2 -0
README.md
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.1.4
|
8 |
app_file: app.py
|
9 |
-
|
|
|
|
|
10 |
---
|
11 |
|
12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
1 |
---
|
2 |
+
title: TIClassifier
|
3 |
+
emoji: πβ¨
|
4 |
+
colorFrom: blue
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
sdk_version: 3.1.4
|
8 |
app_file: app.py
|
9 |
+
models: TransformationTransformer
|
10 |
+
pinned: true
|
11 |
+
|
12 |
---
|
13 |
|
14 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
15 |
+
|
app.py
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
from transformers_interpret import SequenceClassificationExplainer
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
+
|
6 |
+
|
7 |
+
# Setup model
|
8 |
+
classifier = pipeline("text-classification", model="simonschoe/TransformationTransformer")
|
9 |
+
explainer = SequenceClassificationExplainer(classifier.model, classifier.tokenizer)
|
10 |
+
|
11 |
+
legend = """
|
12 |
+
<div style="text-align: center; display: block; margin-left: auto; margin-right: auto; border-top: 1px solid; margin-top: 5px; padding-top: 5px;"><b>Legend: </b><span style="display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(0, 75%, 60%)"></span> Generic <span style="display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(120, 75%, 50%)"></span> Transformation </div>
|
13 |
+
"""
|
14 |
+
|
15 |
+
def classify(_input):
|
16 |
+
"""
|
17 |
+
wrapper method to compute label 1 probability and explanation for given input
|
18 |
+
"""
|
19 |
+
result = classifier(_input)[0]
|
20 |
+
score = result['score']
|
21 |
+
if result['label'] == 'LABEL_0':
|
22 |
+
score = 1-score
|
23 |
+
|
24 |
+
# getting visualization
|
25 |
+
attributions = explainer(_input)
|
26 |
+
html = explainer.visualize().__html__()
|
27 |
+
|
28 |
+
soup = BeautifulSoup(html, 'html.parser')
|
29 |
+
explanation = soup.find_all('td')[-1].__str__().replace('td', 'div')
|
30 |
+
# adding legend to word importance explanation
|
31 |
+
result_html = explanation + legend
|
32 |
+
return score, result_html
|
33 |
+
|
34 |
+
app = gr.Blocks()
|
35 |
+
|
36 |
+
with app:
|
37 |
+
gr.Markdown("# Call2Vec")
|
38 |
+
gr.Markdown("## Semantic Search in Quarterly Earnings Conference Calls")
|
39 |
+
with gr.Row():
|
40 |
+
with gr.Column():
|
41 |
+
text_in = gr.Textbox(lines=1, placeholder="Insert text", label="Search Query")
|
42 |
+
with gr.Row():
|
43 |
+
compute_bt = gr.Button("Calculate")
|
44 |
+
score_out = gr.Number(label="Label 1 probability", interactive=False)
|
45 |
+
html_out = gr.HTML(label="Explanation")
|
46 |
+
with gr.Column():
|
47 |
+
gr.Markdown(
|
48 |
+
"""
|
49 |
+
#### Project Description
|
50 |
+
Call2Vec is a [fastText](https://fasttext.cc/) word embedding model trained via [Gensim](https://radimrehurek.com/gensim/). It maps each token in the vocabulary into a dense, 300-dimensional vector space, designed for performing semantic search.
|
51 |
+
The model is trained on a large sample of quarterly earnings conference calls, held by U.S. firms during the 2006-2022 period. In particular, the training data is restriced to the (rather sponentous) executives' remarks of the Q&A section of the call. The data has been preprocessed prior to model training via stop word removal, lemmatization, named entity masking, and coocurrence modeling.
|
52 |
+
"""
|
53 |
+
)
|
54 |
+
gr.Markdown(
|
55 |
+
"""
|
56 |
+
#### App usage
|
57 |
+
The model is intented to be used for **semantic search**: It encodes the search query (entered in the textbox on the right) in a dense vector space and finds semantic neighbours, i.e., token which frequently occur within similar contexts in the underlying training data.
|
58 |
+
The model allows for two use cases:
|
59 |
+
1. *Single Search:* The input query consists of a single word. When provided a bi-, tri-, or even fourgram, the quality of the model output depends on the presence of the query token in the model's vocabulary. N-grams should be concated by an underscore (e.g., "machine_learning" or "artifical_intelligence").
|
60 |
+
2. *Multi Search:* The input query may consist of several words or n-grams, seperated by comma, semi-colon or newline. It then computes the average vector over all inputs and performs semantic search based on the average input token.
|
61 |
+
"""
|
62 |
+
)
|
63 |
+
gr.Examples(
|
64 |
+
examples=[["Now Accord networks is a company in video, and he led the sales team, and the marketing group at Accord, and he took it from start up, sound familiar, it's from start up to $60 million company in two years."], ["Another test sentence"], ["Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam"]],
|
65 |
+
inputs=[text_in],
|
66 |
+
outputs=[score_out, html_out],
|
67 |
+
fn=classify,
|
68 |
+
cache_examples=True
|
69 |
+
)
|
70 |
+
gr.Markdown(
|
71 |
+
"""
|
72 |
+
<p style="text-align: center;">
|
73 |
+
Call2Vec by X and Y
|
74 |
+
<br>
|
75 |
+
<img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=simonschoe.call2vec&left_color=green&right_color=blue" style="display: block; margin-left: auto; margin-right: auto;"/>
|
76 |
+
</p>
|
77 |
+
"""
|
78 |
+
)
|
79 |
+
compute_bt.click(classify, inputs=[text_in], outputs=[score_out, html_out])
|
80 |
+
|
81 |
+
|
82 |
+
app.launch()
|
83 |
+
|
84 |
+
|
85 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
bs4==4.4.0
|
2 |
+
transformers_interpret==0.7.2
|