Spaces:
Runtime error
Runtime error
from langchain.llms import HuggingFacePipeline | |
from langchain.embeddings import HuggingFaceInstructEmbeddings | |
from langchain.chains import RetrievalQA | |
from transformers import ( | |
AutoTokenizer, | |
AutoModelForSeq2SeqLM, | |
pipeline, | |
GenerationConfig | |
) | |
from textwrap import dedent | |
class lamini: | |
def __init__(self): | |
pass | |
def load_model(self, task="text2text-generation", **kwargs) -> HuggingFacePipeline: | |
"""Returns a pipeline for the model | |
- model: MBZUAI/LaMini-Flan-T5-248M | |
Returns: | |
_type_: _description_ | |
""" | |
model_id = "MBZUAI/LaMini-Flan-T5-248M" | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) | |
gen_config = GenerationConfig.from_pretrained(model_id) | |
max_length = kwargs.get("max_length", 512) | |
temperature = kwargs.get("temperature", 0) | |
top_p = kwargs.get("top_p", 0.95) | |
repetition_penalty = kwargs.get("repetition_penalty", 1.15) | |
pipe = pipeline( | |
"text2text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
generation_config=gen_config, | |
max_length=max_length, | |
top_p=top_p, | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, | |
) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
return llm | |
class templates: | |
def __init__(self, llm: HuggingFacePipeline): | |
self.llm = llm | |
def summarize(self, text, **kwargs): | |
"""Summarize text | |
Args: | |
text (str): text to summarize | |
Returns: | |
str: summarized text | |
""" | |
instruction = "summarize for better understanding: " | |
text = instruction + text | |
return self.llm(text, **kwargs) | |
def generate_tile(self, text, **kwargs): | |
"""Generate a title for text | |
Args: | |
text (str): text to generate title for | |
Returns: | |
str: title | |
""" | |
instruction = "generate a title for this text: " | |
text = instruction + text | |
return self.llm(text, **kwargs) | |
class qa_template: | |
def __init__(self, llm): | |
from langchain.chains.retrieval_qa.base import BaseRetrievalQA | |
self.llm = llm | |
self.qa_inf: BaseRetrievalQA | |
def load(self, knowledge_base): | |
"""Load knowledge base | |
Args: | |
knowledge_base (str): knowledge base to load | |
Returns: | |
BaseRetrievalQA: (optional to use) returns QA interface | |
""" | |
from utils import LangChainChunker | |
from langchain.vectorstores import Chroma | |
from langchain.chains import RetrievalQA | |
embeds = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large") | |
chunker = LangChainChunker(knowledge_base) | |
chunks = chunker.chunker(size=512) | |
db = Chroma.from_texts(chunks, embeds) | |
retriever = db.as_retriever() | |
qa_inf = RetrievalQA.from_chain_type( | |
llm=self.llm, chain_type="stuff", retriever=retriever | |
) | |
self.qa_inf = qa_inf | |
return qa_inf | |
def start_gradio(self, title: str): | |
"""Start gradio interface | |
Returns: | |
_type_: _description_ | |
""" | |
import gradio as gr | |
load = self.load | |
def interface(msg, history): | |
res = self.qa_inf.run(msg) | |
history.append((msg, res)) | |
return "", history | |
def reload(video_id): | |
from utils import getSubsText | |
print(f"Setting up {video_id}") | |
subs = getSubsText(video_id) | |
_ = load(subs) | |
with gr.Blocks() as demo: | |
with gr.Column(): | |
gr.Markdown(dedent(f""" | |
# video to QA | |
A test implementation to use vectorstores and mini llms to create | |
a question answer chatbot interface for _youtube videos_ | |
""")) | |
chatbot = gr.Chatbot() | |
with gr.Row(): | |
with gr.Column(): | |
videoId = gr.Textbox(label="Video ID", placeholder="Enter video ID here") | |
msg = gr.Textbox(label="Question Box" , placeholder="Enter your question here") | |
clear = gr.ClearButton([msg, videoId, chatbot]) | |
gr.Markdown( | |
dedent(""" | |
## Getting started | |
to start up you need to enter the video ID of youtube video first | |
Get a youtube video which has English dialog | |
> ex: https://www.youtube.com/watch?v=BsnCpESUEqM | |
in this `BsnCpESUEqM` is the video ID | |
``` | |
https://www.youtube.com/watch?v=BsnCpESUEqM | |
^^^^^^^^^^^ | |
video_id | |
``` | |
> in url paramets are seperated by `?` and for video id its `?v` | |
copy-paste the video id to the textbox and press return/enter and wait ~5 seconds to fetch video information | |
--- | |
Now in the Question Box _box_/feild start typing the quesions and press return/enter to send to llm | |
""") | |
) | |
msg.submit(interface, [msg, chatbot], [msg, chatbot]) | |
videoId.submit(reload, [videoId]) | |
# ui = gr.ChatInterface( | |
# fn=interface, | |
# examples=["What is the video about?", "key points of the video"], | |
# title=f"Question Mode - {title}", | |
# ) | |
# ui.launch() | |
demo.launch() | |