Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,75 +1,42 @@
|
|
1 |
-
import os
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
-
import transformers
|
5 |
import streamlit as st
|
6 |
|
7 |
-
|
8 |
-
from transformers import pipeline
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
def load_text_gen_model():
|
13 |
-
generator = pipeline("text-generation", model="gpt2-medium")
|
14 |
-
return generator
|
15 |
-
|
16 |
-
@st.cache
|
17 |
-
def get_sentiment_model():
|
18 |
-
sentiment_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-imdb-sentiment")
|
19 |
-
return sentiment_model
|
20 |
-
|
21 |
-
def get_summarizer_model():
|
22 |
-
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
23 |
-
return summarizer
|
24 |
|
25 |
-
|
26 |
-
def get_sentiment(text):
|
27 |
-
input_ids = sentiment_tokenizer .encode(text + '</s>', return_tensors='pt')
|
28 |
-
output = sentiment_extractor.generate(input_ids=input_ids,max_length=2)
|
29 |
-
dec = [sentiment_tokenizer.decode(ids) for ids in output]
|
30 |
-
label = dec[0]
|
31 |
-
return label
|
32 |
-
|
33 |
-
|
34 |
-
def get_qa_model():
|
35 |
-
model_name = "deepset/roberta-base-squad2"
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
summarizer = get_summarizer_model()
|
42 |
-
answer_generator = get_qa_model()
|
43 |
|
44 |
|
45 |
-
|
|
|
|
|
|
|
46 |
|
47 |
-
#action = st.sidebar.selectbox("Pick an Action", ["Analyse a Review","Generate an Article","Create an Image"])
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
|
53 |
-
|
54 |
-
|
55 |
-
start_sentiment_analysis = st.button("Get the Sentiment of the Review")
|
56 |
-
start_summarizing = st.button("Summarize the review")
|
57 |
-
start_topic_extraction = st.button("Find the key topic")
|
58 |
|
59 |
-
|
60 |
-
sentiment = get_sentiment(review)
|
61 |
-
st.write(sentiment)
|
62 |
-
|
63 |
-
if start_summarizing:
|
64 |
-
summary = summarizer(review, max_length=130, min_length=30, do_sample=False)
|
65 |
-
st.write(summary)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
'context': review}
|
70 |
-
answer = answer_generator(QA_input)
|
71 |
-
st.write(answer)
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
|
|
1 |
+
import os
|
2 |
+
import openai
|
3 |
+
import pinecone
|
4 |
+
from langchain.document_loaders import DirectoryLoader
|
5 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
6 |
+
from langchain.embeddings.openai import OpenAIEmbeddings
|
7 |
+
from langchain.vectorstores import Pinecone
|
8 |
+
from langchain.llms import OpenAI
|
9 |
+
from langchain.chains.question_answering import load_qa_chain
|
10 |
|
|
|
11 |
import streamlit as st
|
12 |
|
13 |
+
st.header("Document Question Answering")
|
|
|
14 |
|
15 |
+
directory = st.text_area("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
#directory = '/content/data'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
+
def load_docs(directory):
|
20 |
+
loader = DirectoryLoader(directory)
|
21 |
+
documents = loader.load()
|
22 |
+
return documents
|
|
|
|
|
23 |
|
24 |
|
25 |
+
def split_docs(documents, chunk_size=1000, chunk_overlap=20):
|
26 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
27 |
+
docs = text_splitter.split_documents(documents)
|
28 |
+
return docs
|
29 |
|
|
|
30 |
|
31 |
+
if directory:
|
32 |
+
documents = load_docs(directory)
|
33 |
+
st.write(len(documents))
|
34 |
|
35 |
+
docs = split_docs(documents)
|
36 |
+
print(len(docs))
|
|
|
|
|
|
|
37 |
|
38 |
+
embeddings = OpenAIEmbeddings(model_name="ada")
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
+
query_result = embeddings.embed_query("Hello world")
|
41 |
+
st.write(len(query_result))
|
|
|
|
|
|
|
42 |
|
|
|
|
|
|