Spaces:

dsorokin
/

shad_arxiv_clf

Runtime error

App Files Files Community

dsorokin commited on Oct 28, 2022

Commit

2fc0e56

1 Parent(s): 75c83d9

works

Browse files

Files changed (4) hide show

.gitattributes +1 -0
app.py +45 -6
imgs/akinator_ready.png +0 -0
requirements.txt +3 -2

.gitattributes CHANGED Viewed

@@ -31,3 +31,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.jpeg filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -1,13 +1,52 @@
 import streamlit as st
-st.markdown("### Hello, world!")
-st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
-from transformers import pipeline
-pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
-text = st.text_area("TEXT HERE")
-st.markdown(f"{pipe(text)}")

 import streamlit as st
+import torch
+from torch.nn import functional as F
+from transformers import AutoTokenizer, AutoModelForSequenceClassification
+import json
+import streamlit.components.v1 as components
+if __name__ == '__main__':
+    st.markdown("### Arxiv paper classifier (No guarantees provided)")
+    col1, col2 = st.columns([1, 1])
+    col1.image('imgs/akinator_ready.png', width=200)
+    btn = col2.button('Classify!')
+    model = AutoModelForSequenceClassification.from_pretrained('checkpoint-3000')
+    tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
+    with open('checkpoint-3000/config.json', 'r') as f:
+        id2label = json.load(f)['id2label']
+    id2label = {int(key): value for key, value in id2label.items()}
+    title = st.text_area(label='Input title...', placeholder='Input title...', label_visibility='hidden', height=3)
+    abstract = st.text_area(label='Input title...', placeholder='Input abstract...', label_visibility='hidden', height=10)
+    text = '\n'.join([title, abstract])
+    if btn and len(text) == 1:
+        st.error('Title and abstract are empty!')
+    if btn and len(text) > 1:
+        tokenized = tokenizer(text)
+        with torch.no_grad():
+            out = model(torch.tensor(tokenized['input_ids']).unsqueeze(dim=0))
+        _, ids = torch.sort(-out['logits'])
+        probs = F.softmax(out['logits'][0, ids], dim=1)
+        ids, probs = ids[0], probs[0]
+        ptotal = 0
+        result = []
+        for i, prob in enumerate(probs):
+            ptotal += prob
+            result.append(f'{id2label[ids[i].item()]} (prob = {prob.item()})')
+        output = '<br>'.join(result)
+        components.html(f'<div>'
+                        f'<div style="height:120px;width:680px;'
+                        f'border:1px solid #ccc;border-color: red;'
+                        f'font:16px/26px Georgia, Garamond, Serif;'
+                        f'overflow:scroll;'
+                        f'color:white;">'
+                        f'{output}</div>')

imgs/akinator_ready.png ADDED Viewed

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-transformers
-torch


1	+ transformers==4.15.0
2	+ torch==1.12.1
3	+
4