deepaksarika01's picture
fix typo
0a0c342
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings
from langchain.chains import RetrievalQA
from transformers import (
AutoTokenizer,
AutoModelForSeq2SeqLM,
pipeline,
GenerationConfig
)
from textwrap import dedent
class lamini:
def __init__(self):
pass
def load_model(self, task="text2text-generation", **kwargs) -> HuggingFacePipeline:
"""Returns a pipeline for the model
- model: MBZUAI/LaMini-Flan-T5-248M
Returns:
_type_: _description_
"""
model_id = "MBZUAI/LaMini-Flan-T5-248M"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
gen_config = GenerationConfig.from_pretrained(model_id)
max_length = kwargs.get("max_length", 512)
temperature = kwargs.get("temperature", 0)
top_p = kwargs.get("top_p", 0.95)
repetition_penalty = kwargs.get("repetition_penalty", 1.15)
pipe = pipeline(
"text2text-generation",
model=model,
tokenizer=tokenizer,
generation_config=gen_config,
max_length=max_length,
top_p=top_p,
temperature=temperature,
repetition_penalty=repetition_penalty,
)
llm = HuggingFacePipeline(pipeline=pipe)
return llm
class templates:
def __init__(self, llm: HuggingFacePipeline):
self.llm = llm
def summarize(self, text, **kwargs):
"""Summarize text
Args:
text (str): text to summarize
Returns:
str: summarized text
"""
instruction = "summarize for better understanding: "
text = instruction + text
return self.llm(text, **kwargs)
def generate_tile(self, text, **kwargs):
"""Generate a title for text
Args:
text (str): text to generate title for
Returns:
str: title
"""
instruction = "generate a title for this text: "
text = instruction + text
return self.llm(text, **kwargs)
class qa_template:
def __init__(self, llm):
from langchain.chains.retrieval_qa.base import BaseRetrievalQA
self.llm = llm
self.qa_inf: BaseRetrievalQA
def load(self, knowledge_base):
"""Load knowledge base
Args:
knowledge_base (str): knowledge base to load
Returns:
BaseRetrievalQA: (optional to use) returns QA interface
"""
from utils import LangChainChunker
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
embeds = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-large")
chunker = LangChainChunker(knowledge_base)
chunks = chunker.chunker(size=512)
db = Chroma.from_texts(chunks, embeds)
retriever = db.as_retriever()
qa_inf = RetrievalQA.from_chain_type(
llm=self.llm, chain_type="stuff", retriever=retriever
)
self.qa_inf = qa_inf
return qa_inf
def start_gradio(self, title: str):
"""Start gradio interface
Returns:
_type_: _description_
"""
import gradio as gr
load = self.load
def interface(msg, history):
res = self.qa_inf.run(msg)
history.append((msg, res))
return "", history
def reload(video_id):
from utils import getSubsText
print(f"Setting up {video_id}")
subs = getSubsText(video_id)
_ = load(subs)
with gr.Blocks() as demo:
with gr.Column():
gr.Markdown(dedent(f"""
# video to QA
A test implementation to use vectorstores and mini llms to create
a question answer chatbot interface for _youtube videos_
"""))
chatbot = gr.Chatbot()
with gr.Row():
with gr.Column():
videoId = gr.Textbox(label="Video ID", placeholder="Enter video ID here")
msg = gr.Textbox(label="Question Box" , placeholder="Enter your question here")
clear = gr.ClearButton([msg, videoId, chatbot])
gr.Markdown(
dedent("""
## Getting started
to start up you need to enter the video ID of youtube video first
Get a youtube video which has English dialog
> ex: https://www.youtube.com/watch?v=BsnCpESUEqM
in this `BsnCpESUEqM` is the video ID
```
https://www.youtube.com/watch?v=BsnCpESUEqM
^^^^^^^^^^^
video_id
```
> in url paramets are seperated by `?` and for video id its `?v`
copy-paste the video id to the textbox and press return/enter and wait ~5 seconds to fetch video information
---
Now in the Question Box _box_/feild start typing the quesions and press return/enter to send to llm
""")
)
msg.submit(interface, [msg, chatbot], [msg, chatbot])
videoId.submit(reload, [videoId])
# ui = gr.ChatInterface(
# fn=interface,
# examples=["What is the video about?", "key points of the video"],
# title=f"Question Mode - {title}",
# )
# ui.launch()
demo.launch()