KnowledgeHub / app.py
Nitish-py's picture
Update app.py
f4bf677
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain import vectorstores as vs
from langchain import chains
import pinecone
from goose3 import Goose
import streamlit as st
import whisper
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import AI21
from pytube import YouTube
import moviepy.editor
import time
load_dotenv()
api_key=os.getenv('PINECONE_API_KEY')
env=os.getenv('PINECONE_ENVIRONMENT')
ai21_api_key=os.getenv('AI21_API_KEY')
pinecone.init(api_key=api_key, environment=env)
def txtread(txt_content):
texts = ""
texts += txt_content.decode('utf-8')
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size = 1000,
chunk_overlap = 0)
chunks = text_splitter.split_text(texts)
process.success("Chunking of the data is done")
embeddings = HuggingFaceEmbeddings()
pinecone.init(api_key=api_key, environment=env)
process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="txt")
process.success("Data is securly Uploaded")
def pdfread(pdf):
pdf_reader = PdfReader(pdf)
texts = ""
for page in pdf_reader.pages:
texts += page.extract_text()
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size = 4000,
chunk_overlap = 0)
chunks = text_splitter.split_text(texts)
process.success("Chunking of the data is done")
embeddings = HuggingFaceEmbeddings()
pinecone.init(api_key=api_key, environment=env)
process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="pdf")
process.success("Data is securly Uploaded")
def urlread(url_path):
g = Goose({'browser_user_agent': 'Mozilla', 'parser_class': 'soup'})
texts = g.extract(url=url_path).cleaned_text
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size = 2000,
chunk_overlap = 0)
chunks = text_splitter.split_text(texts)
process.success("Chunking of the data is done")
embeddings = HuggingFaceEmbeddings()
pinecone.init(api_key=api_key, environment=env)
process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="url")
process.success("Data is securly Uploaded")
def scrape(vidlink):
youtubeObject = YouTube(vidlink)
youtubeObject = youtubeObject.streams.get_highest_resolution()
youtubeObject.download(filename='video.mp4')
process.success('Downloading Video')
done=False
while not done:
time.sleep(10)
done=os.path.exists("video.mp4")
video = moviepy.editor.VideoFileClip("video.mp4")
process.warning('Extracting Audio')
audio = video.audio
audio.write_audiofile("audio.mp3")
process.warning('Trancscribing the Audio')
model = whisper.load_model('base')
result=model.transcribe('audio.mp3')
texts=(result['text'])
process.success('Transcription is done')
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size = 1000,
chunk_overlap = 0)
chunks = text_splitter.split_text(texts)
process.success("Chunking of the data is done")
embeddings = HuggingFaceEmbeddings()
pinecone.init(api_key=api_key, environment=env)
process.warning("Starting Upload of the vector data in the Pinecone VectoreDB")
db = vs.pinecone.Pinecone.from_texts(chunks, embeddings,index_name="multigpt",namespace="vid")
process.success("Data is securly Uploaded")
def chain(name):
process.warning("Your Chain is running")
embeddings = HuggingFaceEmbeddings()
pinecone.init(api_key=api_key, environment=env)
db=vs.pinecone.Pinecone.from_existing_index(index_name='multigpt',namespace=name, embedding=embeddings)
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k":10})
llm = AI21(ai21_api_key=ai21_api_key)
qa = chains.ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever)
return qa
def ai(qa,prompt):
chat_history=[]
result = qa({"question": prompt, "chat_history": chat_history})
process.success("Search Complete!")
return result
def intro():
placeholder.title('____________πŸ‘¨πŸ»β€πŸ’» MINOR PROJECT πŸ‘¨πŸ»β€πŸ’»____________\n')
data.subheader('πŸš€ Introducing "KnowledgeHub" Web App! 🌐🧠')
process.write('___________________________________________')
intro=('''
Welcome to the future of knowledge interaction! πŸš€ With our groundbreaking web app, "KnowledgeHub," you can effortlessly infuse intelligence into our platform through various mediums. πŸ“šπŸ’»
How It Works:
πŸ“ File Magic: Upload your knowledge-packed text files or PDFs to seamlessly share insights and wisdom with the world! πŸš€
🌐 URL Wizardry: Simply paste a website URL, and watch as the KnowledgeHub transforms online information into a dynamic source of intelligence! 🀯
πŸŽ₯ YouTube Brilliance: Share video insights by dropping those mind-blowing YouTube links! Transforming video content into knowledge gold has never been easier! 🌟
Why use KnowledgeHub:
πŸš€ Instant Interaction: Say goodbye to static data! Engage with your knowledge instantly and turn information into actionable insights. πŸš€
🌐 Universal Accessibility: Access your knowledge from anywhere, anytime, and empower your audience to dive into your insights effortlessly. 🌍
πŸ€– AI-Powered Conversations: Leverage cutting-edge AI for interactive conversations based on your knowledge repository! It's like having a brilliant virtual assistant at your fingertips! πŸ€–πŸ’‘
πŸ“Š Data-Driven Decisions: Turn raw data into actionable intelligence. Make informed decisions backed by the power of your knowledge repository. πŸ“ˆ
Embrace the future of knowledge sharing with KnowledgeHub – Where ideas come to life, and intelligence knows no bounds! πŸš€πŸ”₯πŸ”''')
ph=st.empty()
x=''
for i in intro:
x+=i
time.sleep(0.005)
ph.markdown(x)
def upload():
placeholder.title("Let's create the Knowledge Base")
process.error('Here you will be notified regarding the status of the upload')
page = ['','TEXT','PDF','URL','VIDEO']
choice = st.sidebar.radio("Choose your mode",page)
if choice=='':
data.subheader('Choose what type of data you wanna upload')
elif choice == 'TEXT':
text = data.file_uploader("Upload your txt file", type="txt")
if text:
txtread(text)
elif choice == 'PDF':
pdf = data.file_uploader("Upload your PDF file", type="pdf")
if pdf:
pdfread(pdf)
elif choice == 'URL':
url_path = data.text_input('Enter the url')
if url_path:
urlread(url_path)
elif choice == 'VIDEO':
link = data.text_input('Enter link to the youtube video')
if link:
scrape(link)
time.sleep(10)
process.success('You can go to the chat section or upload more data')
def chat():
placeholder.title("Let's go!!")
process.error('Here you will be notified regarding the retrival of your answers')
page = ['','TEXT','PDF','URL','VIDEO']
choice = st.sidebar.radio("Choose your mode",page)
if choice=='':
data.subheader('Choose from which data you want answers from')
elif choice == 'TEXT':
name='txt'
query = st.text_input("Ask a question based on the txt file",value="")
if query:
qa=chain(name)
result=ai(qa,query)
ph=st.empty()
x=''
for i in result["answer"]:
x+=i
time.sleep(0.01)
ph.markdown(x)
elif choice == 'PDF':
name='pdf'
query = st.text_input("Ask a question based on the PDF",value="")
if query:
qa=chain(name)
result=ai(qa,query)
ph=st.empty()
x=''
for i in result["answer"]:
x+=i
time.sleep(0.01)
ph.markdown(x)
elif choice == 'URL':
name='url'
query = st.text_input("Ask a question based on the data from the url",value="")
if query:
qa=chain(name)
result=ai(qa,query)
ph=st.empty()
x=''
for i in result["answer"]:
x+=i
time.sleep(0.01)
ph.markdown(x)
elif choice == 'VIDEO':
name='vid'
query = st.text_input("Ask a question from based on the YouTube video",value="")
if query:
qa=chain(name)
result=ai(qa,query)
ph=st.empty()
x=''
for i in result["answer"]:
x+=i
time.sleep(0.01)
ph.markdown(x)
def main():
global placeholder, process, data
placeholder=st.empty()
data=st.empty()
process=st.empty()
page = ['HOME','Upload','Chat']
choice = st.sidebar.radio("Choose upload or chat",page)
if choice=='HOME':
intro()
elif choice=='Upload':
upload()
elif choice=='Chat':
chat()
if __name__ == "__main__":
main()