samim2024 commited on
Commit
904401c
·
verified ·
1 Parent(s): 1bc5330

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +116 -0
app.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #DocArrayInMemorySearch is a document index provided by Docarray that stores documents in memory.
2
+ #It is a great starting point for small datasets, where you may not want to launch a database server.
3
+
4
+ # import libraries
5
+ import streamlit as st
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+ from langchain.document_loaders import TextLoader #reads in a file as text and places it all into one document.
9
+ from langchain.indexes import VectorstoreIndexCreator #Logic for creating indexes.
10
+ from langchain.vectorstores import DocArrayInMemorySearch #document index provided by Docarray that stores documents in memory.
11
+ from sentence_transformers import SentenceTransformer
12
+ from langchain_community.llms import HuggingFaceEndpoint
13
+
14
+ #import vertexai
15
+ #from langchain.llms import VertexAI
16
+ #from langchain.embeddings import VertexAIEmbeddings
17
+
18
+ vertexai.init(project=PROJECT, location=LOCATION) #GCP PROJECT ID, LOCATION as region.
19
+
20
+ #The PaLM 2 for Text (text-bison, text-unicorn) foundation models are optimized for a variety of natural language
21
+ #tasks such as sentiment analysis, entity extraction, and content creation. The types of content that the PaLM 2 for
22
+ #Text models can create include document summaries, answers to questions, and labels that classify content.
23
+ llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9)
24
+ #llm = VertexAI(model_name="text-bison@001",max_output_tokens=256,temperature=0.1,top_p=0.8,top_k=40,verbose=True,)
25
+
26
+ #embeddings = VertexAIEmbeddings()
27
+ embeddings = model.encode(sentences)
28
+
29
+ #The below code scrapes all the text data from the webpage link provided by the user and saves it in a text file.
30
+ def get_text(url):
31
+ # Send a GET request to the URL
32
+ response = requests.get(url)
33
+
34
+ # Create a BeautifulSoup object with the HTML content
35
+ soup = BeautifulSoup(response.content, "html.parser")
36
+
37
+ # Find the specific element or elements containing the text you want to scrape
38
+ # Here, we'll find all <p> tags and extract their text
39
+ paragraphs = soup.find_all("p")
40
+
41
+ # Loop through the paragraphs and print their text
42
+ with open("text\\temp.txt", "w", encoding='utf-8') as file:
43
+ # Loop through the paragraphs and write their text to the file
44
+ for paragraph in paragraphs:
45
+ file.write(paragraph.get_text() + "\n")
46
+
47
+ @st.cache_resource
48
+ def create_langchain_index(input_text):
49
+ print("--indexing---")
50
+ get_text(input_text)
51
+ loader = TextLoader("text\\temp.txt", encoding='utf-8')
52
+ # data = loader.load()
53
+
54
+ index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch,embedding=embeddings).from_loaders([loader])
55
+ return index
56
+
57
+ # @st.cache_resource
58
+ # def get_basic_page_details(input_text,summary_query,tweet_query,ln_query):
59
+ # index = create_langchain_index(input_text)
60
+ # summary_response = index.query(summary_query)
61
+ # tweet_response = index.query(tweet_query)
62
+ # ln_response = index.query(ln_query)
63
+
64
+ # return summary_response,tweet_response,ln_response
65
+
66
+
67
+ @st.cache_data
68
+ def get_response(input_text,query):
69
+ print(f"--querying---{query}")
70
+ response = index.query(query,llm=llm)
71
+ return response
72
+
73
+ #The below code is a simple flow to accept the webpage link and process the queries
74
+ #using the get_response function created above. Using the cache, the same.
75
+
76
+ st.title('Webpage Question and Answering')
77
+
78
+
79
+ input_text=st.text_input("Provide the link to the webpage...")
80
+
81
+ summary_response = ""
82
+ tweet_response = ""
83
+ ln_response = ""
84
+ # if st.button("Load"):
85
+ if input_text:
86
+ index = create_langchain_index(input_text)
87
+ summary_query ="Write a 100 words summary of the document"
88
+ summary_response = get_response(input_text,summary_query)
89
+
90
+ tweet_query ="Write a twitter tweet"
91
+ tweet_response = get_response(input_text,tweet_query)
92
+
93
+ ln_query ="Write a linkedin post for the document"
94
+ ln_response = get_response(input_text,ln_query)
95
+
96
+
97
+ with st.expander('Page Summary'):
98
+ st.info(summary_response)
99
+
100
+ with st.expander('Tweet'):
101
+ st.info(tweet_response)
102
+
103
+ with st.expander('LinkedIn Post'):
104
+ st.info(ln_response)
105
+
106
+
107
+ st.session_state.input_text = ''
108
+ question=st.text_input("Ask a question from the link you shared...")
109
+ if st.button("Ask"):
110
+ if question:
111
+ index = create_langchain_index(input_text)
112
+ response = get_response(input_text,question)
113
+ st.write(response)
114
+ else:
115
+ st.warning("Please enter a question.")
116
+