Spaces:
Sleeping
Sleeping
File size: 5,693 Bytes
3d2ccf5 cf322df 97f5741 b3d2c60 3d2ccf5 1b8a80e 75e25ba 19d6ff0 e752a6d 19d6ff0 e752a6d 19d6ff0 3d2ccf5 b3d2c60 0b96e65 296d90c 518d051 5322743 296d90c c52039d 51739ac 850e742 e413986 b3d2c60 e413986 51739ac b3d2c60 19d6ff0 c52039d e413986 19d6ff0 b3d2c60 7ec07f8 e413986 7ec07f8 e413986 306b3a0 7ec07f8 e413986 92a0f76 96af6c4 e413986 e752a6d e413986 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
import streamlit as st
import requests
import os
from streamlit_chat import message
import random
from sentence_transformers import SentenceTransformer, util
import nltk
context = "To extract information from documents, use sentence similarity task. To classify sentiments, use text classification task. To do sentiment analysis, use text classification task. To detect masks from images, use object detection task. To extract name or address from documents use token classification task. To extract name or address from invoices, use token classification task. To build voice enabled applications, you can use automatic speech recognition task. You can retrieve information from documents using sentence similarity task. You can summarize papers using summarization task. You can convert text to speech using text-to-speech task. To detect language spoken in an audio, you can use audio classification task. To detect emotion in an audio, you can use audio classification task. To detect commands in an audio, you can use audio classification task. To decompose sounds in a recording, use audio-to-audio task. To answer questions from a document, you can use question answering task. To answer FAQs from your customers, you can use question answering task. To see if a text is grammatically correct, you can use text classification task. To augment your training data, you can use text classification task. To detect pedestrians, you can use object detection task."
link_dict = {
"audio-to-audio": "https://huggingface.co/tasks/audio-to-audio",
"audio classification": "https://huggingface.co/tasks/audio-classification",
"automatic speech recognition": "https://huggingface.co/tasks/automatic-speech-recognition",
"fill-mask":"https://huggingface.co/tasks/fill-mask",
"image classification": "https://huggingface.co/tasks/image-classification",
"image segmentation": "https://huggingface.co/tasks/image-segmentation",
"question answering":"https://huggingface.co/tasks/question-answering",
"text-to-speech":"https://huggingface.co/tasks/text-to-speech",
"sentence similarity": "https://huggingface.co/tasks/sentence-similarity",
"summarization":"https://huggingface.co/tasks/summarization",
"text generation": "https://huggingface.co/tasks/text-generation",
"translation": "https://huggingface.co/tasks/translation",
"token classification": "https://huggingface.co/tasks/token-classification",
"object detection": "https://huggingface.co/tasks/object-detection"}
corpus = []
sentence_count = []
for sent in context.split("."):
sentences = nltk.tokenize.sent_tokenize(str(sent), language='english')
sentence_count.append(len(sentences))
for _,s in enumerate(sentences):
corpus.append(s)
corpus_embeddings = np.load('task_embeddings_msmarco-distilbert-base-v4.npy')
corpus_embeddings.shape
def find_sentences(query):
query_embedding = model.encode(query)
hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
hits = hits[0]
for hit in hits:
corpus_id = hit['corpus_id']
print(corpus[corpus_id], "(Score: {:.4f})".format(hit['score']))
# Find source document based on sentence index
count = 0
for idx, c in enumerate(sentence_count):
count+=c
if (corpus_id > count-1):
continue
else:
doc = corpus[idx]
print(f"Document: {doc}, {count}")
break
message_history = [{"text":"Let's find out the best task for your use case! Tell me about your use case :)", "is_user":False}]
st.subheader("If you don't know how to build your machine learning product for your use case, Taskmaster is here to help you! πͺβ¨")
st.write("Hint: Try to ask your use case in a question form.")
for msg in message_history:
message(msg["text"], is_user = msg["is_user"]) # display all the previous message
placeholder = st.empty() # placeholder for latest message
input = st.text_input("Ask me π€")
if input:
message_history.append({"text":input, "is_user" : True})
model_answer = find_sentences("How can I extract information from invoices?")
if resp.status_code == 200:
key_exists = False
for key in link_dict:
if key in model_answer:
key_exists = True
url = link_dict[key]
response_templates = [f"I think that {model_answer} is the best task for this π€© Check out the page ππΌ {url}", f"I think you should use {model_answer} πͺ Check it out here ππΌ {url}", f"I think {model_answer} should work for you π€ Check out the page ππΌ {url}"]
bot_answer = random.choice(response_templates)
message_history.append({"text":bot_answer, "is_user" : False})
if key_exists == False:
fallback_template = ["I didn't get the question π§ Could you please ask again? Try 'What should I use for detecting masks in an image?'",
"Hmm, not sure I know the answer, maybe you could ask differently? π€",
"Sorry, I didn't understand you, maybe you could ask differently? π€ Try asking 'What should I use to extract name in a document' π€"]
bot_answer = random.choice(fallback_template)
message_history.append({"text":bot_answer, "is_user" : False})
with placeholder.container():
last_message = message_history[-1]
if last_message:
message(last_message["text"], last_message["is_user"])
|