Gladiator commited on
Commit
f5b4f3a
·
1 Parent(s): 66a35b4

add the ability to download the latest artifacts from W&B server

Browse files
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -2,6 +2,7 @@ import os
2
  import re
3
  from ast import literal_eval
4
 
 
5
  import gradio as gr
6
  import pandas as pd
7
  from langchain.callbacks import get_openai_callback
@@ -11,10 +12,17 @@ from langchain.embeddings.openai import OpenAIEmbeddings
11
  from langchain.prompts import PromptTemplate
12
  from langchain.vectorstores import Chroma
13
 
14
- df_path = "data/summary_que_data.csv"
15
- chromadb_dir = "data/chromadb"
16
 
 
 
 
 
17
 
 
 
 
 
18
  df = pd.read_csv(df_path)
19
 
20
 
@@ -43,7 +51,8 @@ def get_podcast_info(title: str):
43
 
44
 
45
  def get_answer(podcast: str, question: str):
46
- db_dir = os.path.join(chromadb_dir, podcast.replace(" ", "_"))
 
47
  embeddings = OpenAIEmbeddings()
48
  db = Chroma(persist_directory=db_dir, embedding_function=embeddings)
49
 
 
2
  import re
3
  from ast import literal_eval
4
 
5
+ import wandb
6
  import gradio as gr
7
  import pandas as pd
8
  from langchain.callbacks import get_openai_callback
 
12
  from langchain.prompts import PromptTemplate
13
  from langchain.vectorstores import Chroma
14
 
15
+ from src.config import config
 
16
 
17
+ # download and read data
18
+ api = wandb.Api()
19
+ artifact_df = api.artifact(config.summarized_que_data_artifact)
20
+ artifact_df.download(config.root_data_dir)
21
 
22
+ artifact_embeddings = api.artifact(config.transcript_embeddings_artifact)
23
+ chromadb_dir = artifact_embeddings.download(config.root_data_dir / "chromadb")
24
+
25
+ df_path = artifact_df / "summarized_que_podcasts.csv"
26
  df = pd.read_csv(df_path)
27
 
28
 
 
51
 
52
 
53
  def get_answer(podcast: str, question: str):
54
+ index = df[df["title"] == podcast].index[0]
55
+ db_dir = os.path.join(chromadb_dir, str(index))
56
  embeddings = OpenAIEmbeddings()
57
  db = Chroma(persist_directory=db_dir, embedding_function=embeddings)
58