Spaces:

simonschoe
/

TIC

Sleeping

App Files Files Community

simonschoe commited on Aug 18, 2022

Commit

3932ad8

1 Parent(s): 3488f7c

init repo

Browse files

Files changed (3) hide show

README.md +7 -4
app.py +85 -0
requirements.txt +2 -0

README.md CHANGED Viewed

@@ -1,12 +1,15 @@
 ---
-title: TIC
-emoji: 🐨
-colorFrom: green
 colorTo: red
 sdk: gradio
 sdk_version: 3.1.4
 app_file: app.py
-pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: TIClassifier
+emoji: 📃✨
+colorFrom: blue
 colorTo: red
 sdk: gradio
 sdk_version: 3.1.4
 app_file: app.py
+models: TransformationTransformer
+pinned: true
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import gradio as gr
+from transformers import pipeline
+from transformers_interpret import SequenceClassificationExplainer
+from bs4 import BeautifulSoup
+# Setup model
+classifier = pipeline("text-classification", model="simonschoe/TransformationTransformer")
+explainer = SequenceClassificationExplainer(classifier.model, classifier.tokenizer)
+legend = """
+<div style="text-align: center; display: block; margin-left: auto; margin-right: auto; border-top: 1px solid; margin-top: 5px; padding-top: 5px;"><b>Legend: </b><span style="display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(0, 75%, 60%)"></span> Generic  <span style="display: inline-block; width: 10px; height: 10px; border: 1px solid; background-color: hsl(120, 75%, 50%)"></span> Transformation  </div>
+"""
+def classify(_input):
+    """
+    wrapper method to compute label 1 probability and explanation for given input
+    """
+    result = classifier(_input)[0]
+    score = result['score']
+    if result['label'] == 'LABEL_0':
+        score = 1-score
+    # getting visualization
+    attributions = explainer(_input)
+    html = explainer.visualize().__html__()
+    soup = BeautifulSoup(html, 'html.parser')
+    explanation = soup.find_all('td')[-1].__str__().replace('td', 'div')
+    # adding legend to word importance explanation
+    result_html = explanation + legend
+    return score, result_html
+app = gr.Blocks()
+with app:
+    gr.Markdown("# Call2Vec")
+    gr.Markdown("## Semantic Search in Quarterly Earnings Conference Calls")
+    with gr.Row():
+        with gr.Column():
+            text_in = gr.Textbox(lines=1, placeholder="Insert text", label="Search Query")
+            with gr.Row():
+                compute_bt = gr.Button("Calculate")
+            score_out = gr.Number(label="Label 1 probability", interactive=False)
+            html_out = gr.HTML(label="Explanation")
+        with gr.Column():
+            gr.Markdown(
+                """
+                #### Project Description
+                Call2Vec is a [fastText](https://fasttext.cc/) word embedding model trained via [Gensim](https://radimrehurek.com/gensim/). It maps each token in the vocabulary into a dense, 300-dimensional vector space, designed for performing semantic search.
+                The model is trained on a large sample of quarterly earnings conference calls, held by U.S. firms during the 2006-2022 period. In particular, the training data is restriced to the (rather sponentous) executives' remarks of the Q&A section of the call. The data has been preprocessed prior to model training via stop word removal, lemmatization, named entity masking, and coocurrence modeling.
+                """
+            )
+            gr.Markdown(
+                """
+                #### App usage
+                The model is intented to be used for **semantic search**: It encodes the search query (entered in the textbox on the right) in a dense vector space and finds semantic neighbours, i.e., token which frequently occur within similar contexts in the underlying training data.
+                The model allows for two use cases:
+                1. *Single Search:* The input query consists of a single word. When provided a bi-, tri-, or even fourgram, the quality of the model output depends on the presence of the query token in the model's vocabulary. N-grams should be concated by an underscore (e.g., "machine_learning" or "artifical_intelligence").
+                2. *Multi Search:* The input query may consist of several words or n-grams, seperated by comma, semi-colon or newline. It then computes the average vector over all inputs and performs semantic search based on the average input token.
+                """
+            )
+            gr.Examples(
+                examples=[["Now Accord networks is a company in video, and he led the sales team, and the marketing group at Accord, and he took it from start up, sound familiar, it's from start up to $60 million company in two years."], ["Another test sentence"], ["Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam"]],
+                inputs=[text_in],
+                outputs=[score_out, html_out],
+                fn=classify,
+                cache_examples=True
+            )
+    gr.Markdown(
+        """
+        <p style="text-align: center;">
+            Call2Vec by X and Y
+            <br>
+            <img id="visitor-badge" alt="visitor badge" src="https://visitor-badge.glitch.me/badge?page_id=simonschoe.call2vec&left_color=green&right_color=blue" style="display: block; margin-left: auto; margin-right: auto;"/>
+        </p>
+        """
+    )
+    compute_bt.click(classify, inputs=[text_in], outputs=[score_out, html_out])
+app.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ bs4==4.4.0
2	+ transformers_interpret==0.7.2