Spaces:
Sleeping
Sleeping
File size: 4,500 Bytes
904401c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
#DocArrayInMemorySearch is a document index provided by Docarray that stores documents in memory.
#It is a great starting point for small datasets, where you may not want to launch a database server.
# import libraries
import streamlit as st
import requests
from bs4 import BeautifulSoup
from langchain.document_loaders import TextLoader #reads in a file as text and places it all into one document.
from langchain.indexes import VectorstoreIndexCreator #Logic for creating indexes.
from langchain.vectorstores import DocArrayInMemorySearch #document index provided by Docarray that stores documents in memory.
from sentence_transformers import SentenceTransformer
from langchain_community.llms import HuggingFaceEndpoint
#import vertexai
#from langchain.llms import VertexAI
#from langchain.embeddings import VertexAIEmbeddings
vertexai.init(project=PROJECT, location=LOCATION) #GCP PROJECT ID, LOCATION as region.
#The PaLM 2 for Text (text-bison, text-unicorn) foundation models are optimized for a variety of natural language
#tasks such as sentiment analysis, entity extraction, and content creation. The types of content that the PaLM 2 for
#Text models can create include document summaries, answers to questions, and labels that classify content.
llm = HuggingFaceEndpoint(repo_id="mistralai/Mistral-7B-Instruct-v0.2", Temperature=0.9)
#llm = VertexAI(model_name="text-bison@001",max_output_tokens=256,temperature=0.1,top_p=0.8,top_k=40,verbose=True,)
#embeddings = VertexAIEmbeddings()
embeddings = model.encode(sentences)
#The below code scrapes all the text data from the webpage link provided by the user and saves it in a text file.
def get_text(url):
# Send a GET request to the URL
response = requests.get(url)
# Create a BeautifulSoup object with the HTML content
soup = BeautifulSoup(response.content, "html.parser")
# Find the specific element or elements containing the text you want to scrape
# Here, we'll find all <p> tags and extract their text
paragraphs = soup.find_all("p")
# Loop through the paragraphs and print their text
with open("text\\temp.txt", "w", encoding='utf-8') as file:
# Loop through the paragraphs and write their text to the file
for paragraph in paragraphs:
file.write(paragraph.get_text() + "\n")
@st.cache_resource
def create_langchain_index(input_text):
print("--indexing---")
get_text(input_text)
loader = TextLoader("text\\temp.txt", encoding='utf-8')
# data = loader.load()
index = VectorstoreIndexCreator(vectorstore_cls=DocArrayInMemorySearch,embedding=embeddings).from_loaders([loader])
return index
# @st.cache_resource
# def get_basic_page_details(input_text,summary_query,tweet_query,ln_query):
# index = create_langchain_index(input_text)
# summary_response = index.query(summary_query)
# tweet_response = index.query(tweet_query)
# ln_response = index.query(ln_query)
# return summary_response,tweet_response,ln_response
@st.cache_data
def get_response(input_text,query):
print(f"--querying---{query}")
response = index.query(query,llm=llm)
return response
#The below code is a simple flow to accept the webpage link and process the queries
#using the get_response function created above. Using the cache, the same.
st.title('Webpage Question and Answering')
input_text=st.text_input("Provide the link to the webpage...")
summary_response = ""
tweet_response = ""
ln_response = ""
# if st.button("Load"):
if input_text:
index = create_langchain_index(input_text)
summary_query ="Write a 100 words summary of the document"
summary_response = get_response(input_text,summary_query)
tweet_query ="Write a twitter tweet"
tweet_response = get_response(input_text,tweet_query)
ln_query ="Write a linkedin post for the document"
ln_response = get_response(input_text,ln_query)
with st.expander('Page Summary'):
st.info(summary_response)
with st.expander('Tweet'):
st.info(tweet_response)
with st.expander('LinkedIn Post'):
st.info(ln_response)
st.session_state.input_text = ''
question=st.text_input("Ask a question from the link you shared...")
if st.button("Ask"):
if question:
index = create_langchain_index(input_text)
response = get_response(input_text,question)
st.write(response)
else:
st.warning("Please enter a question.")
|