Spaces:
Runtime error
Runtime error
File size: 4,073 Bytes
d1430bc f5b4f3a d1430bc f5b4f3a d1430bc f5b4f3a d1430bc f5b4f3a ab86fa9 d1430bc f5b4f3a d1430bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import re
from ast import literal_eval
import wandb
import gradio as gr
import pandas as pd
from langchain.callbacks import get_openai_callback
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Chroma
from src.config import config
# download and read data
api = wandb.Api()
artifact_df = api.artifact(config.summarized_que_data_artifact)
artifact_df.download(config.root_data_dir)
artifact_embeddings = api.artifact(config.transcript_embeddings_artifact)
chromadb_dir = artifact_embeddings.download(config.root_data_dir / "chromadb")
df_path = config.root_data_dir / "summarized_que_podcasts.csv"
df = pd.read_csv(df_path)
def embed_video(title: str):
video_url = df[df["title"] == title]["url"].values[0]
match = re.search(r"v=([-\w]+)", video_url)
video_id = match.group(1)
# embed video
video_embed = f"<iframe width='560' height='315' src=https://www.youtube.com/embed/{video_id} frameborder='0' allowfullscreen></iframe>"
return video_embed
def get_podcast_info(title: str):
# get questions
questions = df[df["title"] == title]["questions"].values[0]
questions = literal_eval(questions)
que_str = ""
for que in questions:
que_str += f"π {que}\n"
# get summary
summary = df[df["title"] == title]["summary"].values[0]
return summary, que_str
def get_answer(podcast: str, question: str):
index = df[df["title"] == podcast].index[0]
db_dir = os.path.join(chromadb_dir, str(index))
embeddings = OpenAIEmbeddings()
db = Chroma(persist_directory=db_dir, embedding_function=embeddings)
prompt_template = """Use the following pieces of context to answer the question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Don't add your opinions or interpretations. Ensure that you complete the answer.
If the question is not relevant to the context, just say that it is not relevant.
CONTEXT:
{context}
QUESTION: {question}
ANSWER:"""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
retriever = db.as_retriever()
retriever.search_kwargs["k"] = 2
qa = RetrievalQA.from_chain_type(
llm=ChatOpenAI(temperature=0),
chain_type="stuff",
retriever=retriever,
chain_type_kwargs={"prompt": prompt},
return_source_documents=True,
)
with get_openai_callback() as cb:
result = qa({"query": question})
print(cb)
answer = result["result"]
return answer
with gr.Blocks() as demo:
gr.Markdown("# Welcome to Gradient Dissent QA Bot!")
with gr.Row():
with gr.Column(scale=0.5):
dropdown = gr.Dropdown(
df["title"].to_list(), label="Select a Podcast Episode", value=df.iloc[0]["title"]
)
podcast_info_btn = gr.Button("Get Podcast Info")
podcast_info_btn.click(
fn=embed_video,
inputs=dropdown,
outputs=gr.HTML(label="Podcast Video"),
)
question_box = gr.Textbox(label="Ask a question about the podcast episode")
with gr.Row():
ques_clear_btn = gr.Button("Clear")
ques_btn = gr.Button("Get Answer")
ques_btn.click(
fn=get_answer,
inputs=[dropdown, question_box],
outputs=gr.Textbox(label="Answer"),
)
ques_clear_btn.click(lambda: None, None, question_box, queue=False)
with gr.Column(scale=0.5):
podcast_info_btn.click(
fn=get_podcast_info,
inputs=dropdown,
outputs=[
gr.Text(label="Summary of the podcast"),
gr.Text(label="Some of the questions you can ask"),
],
)
demo.launch()
|