Sasidhar commited on
Commit
ae9c1ea
·
1 Parent(s): 9ebe00e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -61
app.py CHANGED
@@ -1,75 +1,42 @@
1
- import os
2
- #os.system('pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu102/torch1.9/index.html')
 
 
 
 
 
 
 
3
 
4
- import transformers
5
  import streamlit as st
6
 
7
- from transformers import AutoTokenizer, AutoModelWithLMHead
8
- from transformers import pipeline
9
 
10
- sentiment_tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-imdb-sentiment")
11
-
12
- def load_text_gen_model():
13
- generator = pipeline("text-generation", model="gpt2-medium")
14
- return generator
15
-
16
- @st.cache
17
- def get_sentiment_model():
18
- sentiment_model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-imdb-sentiment")
19
- return sentiment_model
20
-
21
- def get_summarizer_model():
22
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
23
- return summarizer
24
 
25
-
26
- def get_sentiment(text):
27
- input_ids = sentiment_tokenizer .encode(text + '</s>', return_tensors='pt')
28
- output = sentiment_extractor.generate(input_ids=input_ids,max_length=2)
29
- dec = [sentiment_tokenizer.decode(ids) for ids in output]
30
- label = dec[0]
31
- return label
32
-
33
-
34
- def get_qa_model():
35
- model_name = "deepset/roberta-base-squad2"
36
 
37
- qa_pipeline = pipeline('question-answering', model=model_name, tokenizer=model_name)
38
- return qa_pipeline
39
-
40
- sentiment_extractor = get_sentiment_model()
41
- summarizer = get_summarizer_model()
42
- answer_generator = get_qa_model()
43
 
44
 
45
- st.header("Review Analyzer")
 
 
 
46
 
47
- #action = st.sidebar.selectbox("Pick an Action", ["Analyse a Review","Generate an Article","Create an Image"])
48
 
49
- #if action == "Analyse a Review":
50
- st.subheader("Paste/write a review here..")
51
- review = st.text_area("")
52
 
53
- if review:
54
-
55
- start_sentiment_analysis = st.button("Get the Sentiment of the Review")
56
- start_summarizing = st.button("Summarize the review")
57
- start_topic_extraction = st.button("Find the key topic")
58
 
59
- if start_sentiment_analysis:
60
- sentiment = get_sentiment(review)
61
- st.write(sentiment)
62
-
63
- if start_summarizing:
64
- summary = summarizer(review, max_length=130, min_length=30, do_sample=False)
65
- st.write(summary)
66
 
67
- if start_topic_extraction:
68
- QA_input = {'question': 'what is the review about?',
69
- 'context': review}
70
- answer = answer_generator(QA_input)
71
- st.write(answer)
72
 
73
-
74
-
75
-
 
1
+ import os
2
+ import openai
3
+ import pinecone
4
+ from langchain.document_loaders import DirectoryLoader
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings.openai import OpenAIEmbeddings
7
+ from langchain.vectorstores import Pinecone
8
+ from langchain.llms import OpenAI
9
+ from langchain.chains.question_answering import load_qa_chain
10
 
 
11
  import streamlit as st
12
 
13
+ st.header("Document Question Answering")
 
14
 
15
+ directory = st.text_area("")
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ #directory = '/content/data'
 
 
 
 
 
 
 
 
 
 
18
 
19
+ def load_docs(directory):
20
+ loader = DirectoryLoader(directory)
21
+ documents = loader.load()
22
+ return documents
 
 
23
 
24
 
25
+ def split_docs(documents, chunk_size=1000, chunk_overlap=20):
26
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
27
+ docs = text_splitter.split_documents(documents)
28
+ return docs
29
 
 
30
 
31
+ if directory:
32
+ documents = load_docs(directory)
33
+ st.write(len(documents))
34
 
35
+ docs = split_docs(documents)
36
+ print(len(docs))
 
 
 
37
 
38
+ embeddings = OpenAIEmbeddings(model_name="ada")
 
 
 
 
 
 
39
 
40
+ query_result = embeddings.embed_query("Hello world")
41
+ st.write(len(query_result))
 
 
 
42