Spaces:
Sleeping
Sleeping
File size: 5,998 Bytes
2b89dc1 3d48fe6 2b89dc1 8fd5a3e 2b89dc1 f097af1 2b89dc1 f097af1 2b89dc1 56e215e 2b89dc1 167a25a 2b89dc1 a03e0aa 8fd5a3e a03e0aa 8fd5a3e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
import os
import gradio as gr
import logging
from langchain.document_loaders import PDFMinerLoader,CSVLoader ,UnstructuredWordDocumentLoader,TextLoader,OnlinePDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings
from langchain.vectorstores import FAISS
from langchain import HuggingFaceHub
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.docstore.document import Document
from youtube_transcript_api import YouTubeTranscriptApi
import chatops
logger = logging.getLogger(__name__)
DEVICE = 'cpu'
MAX_NEW_TOKENS = 4096
DEFAULT_TEMPERATURE = 0.1
DEFAULT_MAX_NEW_TOKENS = 2048
MAX_INPUT_TOKEN_LENGTH = 4000
DEFAULT_CHAR_LENGTH = 1000
def loading_file():
return "Loading..."
def clear_chat():
return []
def get_text_from_youtube_link(video_link,max_video_length=800):
video_text = ""
video_id = video_link.split("watch?v=")[1].split("&")[0]
srt = YouTubeTranscriptApi.get_transcript(video_id)
for text_data in srt:
video_text = video_text + " " + text_data.get("text")
if len(video_text) > max_video_length:
return video_text[0:max_video_length]
else:
return video_text
def process_documents(documents,data_chunk=1500,chunk_overlap=100):
text_splitter = CharacterTextSplitter(chunk_size=data_chunk, chunk_overlap=chunk_overlap,separator='\n')
texts = text_splitter.split_documents(documents)
return texts
def process_youtube_link(link, document_name="youtube-content"):
try:
metadata = {"source": f"{document_name}.txt"}
return [Document(page_content=get_text_from_youtube_link(video_link=link), metadata=metadata)]
except Exception as err:
logger.error(f'Error in reading document. {err}')
def youtube_chat(youtube_link,API_key,llm='HuggingFace',temperature=0.1,max_tokens=1096,char_length=1500):
document = process_youtube_link(link=youtube_link)
embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-base',model_kwargs={"device": DEVICE})
texts = process_documents(documents=document)
global vector_db
vector_db = FAISS.from_documents(documents=texts, embedding= embedding_model)
global qa
qa = RetrievalQA.from_chain_type(llm=chatops.chat_application(llm_service=llm,key=API_key,
temperature=temperature,
max_tokens=max_tokens
),
chain_type='stuff',
retriever=vector_db.as_retriever(),
# chain_type_kwargs=chain_type_kwargs,
return_source_documents=True
)
return "Youtube link Processing completed ..."
##################################################
##################################################
################### GRADIO #######################
##################################################
##################################################
css="""
#col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
"""
title = """
<div style="text-align: center;max-width: 700px;">
<h1>Chat with You Tube videos• OpenAI/HuggingFace</h1>
<p style="text-align: center;">Upload a You tube Link, to create its captions and load them as embeddings <br />
once status is ready, you can start asking questions about the content you uploaded.<br />
The repo provides you an option to use HuggingFace/OpenAI as LLM's, make sure to add your API Key before proceding.
</p>
</div>
"""
with gr.Blocks(css="""#chatbot {font-size: 14px;min-height: 300px;}""") as demo:
with gr.Column(elem_id="col-container"):
gr.HTML(title)
with gr.Group():
chatbot = gr.Chatbot(height=300)
with gr.Row():
question = gr.Textbox(label="Type your question !",lines=1).style(full_width=True)
submit_btn = gr.Button(value="Send message", variant="primary", scale = 1)
clean_chat_btn = gr.Button("Delete Chat")
with gr.Column():
with gr.Box():
LLM_option = gr.Dropdown(['HuggingFace','OpenAI'],label='Large Language Model Selection',info='LLM Service')
API_key = gr.Textbox(label="Add API key", type="password",autofocus=True)
with gr.Accordion(label='Advanced options', open=False):
max_new_tokens = gr.Slider(
label='Max new tokens',
minimum=2048,
maximum=MAX_NEW_TOKENS,
step=1,
value=DEFAULT_MAX_NEW_TOKENS,
)
temperature = gr.Slider(
label='Temperature',
minimum=0.1,
maximum=4.0,
step=0.1,
value=DEFAULT_TEMPERATURE,
)
char_length = gr.Slider(
label='Max Character',
minimum= DEFAULT_CHAR_LENGTH,
maximum = 5*DEFAULT_CHAR_LENGTH,
step = 500,
value= 1500
)
with gr.Column():
with gr.Box():
youtube_link = gr.Textbox(label="Add your you tube Link",text_align='left',autofocus=True)
with gr.Column():
with gr.Box():
load_youtube_bt = gr.Button("Process Youtube Link",).style(full_width = False)
langchain_status = gr.Textbox(label="Status", placeholder="", interactive = False)
load_youtube_bt.click(youtube_chat,inputs= [youtube_link,API_key,LLM_option,temperature,max_new_tokens,char_length],outputs=[langchain_status], queue=False)
clean_chat_btn.click(clear_chat, [], chatbot)
demo.launch() |