PierreBrunelle commited on
Commit
7f1444b
·
verified ·
1 Parent(s): 4e337e8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -22
app.py CHANGED
@@ -26,13 +26,6 @@ if 'FIREWORKS_API_KEY' not in os.environ:
26
  if 'MISTRAL_API_KEY' not in os.environ:
27
  os.environ['MISTRAL_API_KEY'] = getpass.getpass('Mistral AI API Key:')
28
 
29
- """## Creating UDFs: Embedding and Prompt Functions"""
30
-
31
- # Set up embedding function
32
- @pxt.expr_udf
33
- def e5_embed(text: str) -> np.ndarray:
34
- return sentence_transformer(text, model_id='intfloat/e5-large-v2')
35
-
36
  # Create prompt function
37
  @pxt.udf
38
  def create_prompt(top_k_list: list[dict], question: str) -> str:
@@ -87,8 +80,11 @@ def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, sh
87
 
88
  progress(0.4, desc="Generating embeddings...")
89
 
90
- # Add an embedding index to the chunks for similarity search
91
- chunks_t.add_embedding_index('text', string_embed=e5_embed)
 
 
 
92
 
93
  # Define a query function to retrieve the top-k most similar chunks for a given question
94
  @chunks_t.query
@@ -101,10 +97,10 @@ def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, sh
101
  )
102
 
103
  # Add computed columns to the queries table for context retrieval and prompt creation
104
- queries_t['question_context'] = chunks_t.top_k(queries_t.question)
105
- queries_t['prompt'] = create_prompt(
106
  queries_t.question_context, queries_t.question
107
- )
108
 
109
  # Prepare messages for the OpenAI API, including system instructions and user prompt
110
  msgs = [
@@ -121,37 +117,37 @@ def process_files(ground_truth_file, pdf_files, chunk_limit, chunk_separator, sh
121
  progress(0.6, desc="Querying models...")
122
 
123
  # Add OpenAI response column
124
- queries_t['response'] = openai.chat_completions(
125
  model='gpt-4o-mini-2024-07-18',
126
  messages=msgs,
127
  max_tokens=300,
128
  top_p=0.9,
129
  temperature=0.7
130
- )
131
 
132
  # Create a table in Pixeltable and pick a model hosted on Anthropic with some parameters
133
- queries_t['response_2'] = f_chat_completions(
134
  messages=msgs,
135
  model='accounts/fireworks/models/llama-v3p2-3b-instruct',
136
  # These parameters are optional and can be used to tune model behavior:
137
  max_tokens=300,
138
  top_p=0.9,
139
  temperature=0.7
140
- )
141
 
142
- queries_t['response_3'] = chat_completions(
143
  messages=msgs,
144
  model='mistral-small-latest',
145
  # These parameters are optional and can be used to tune model behavior:
146
  max_tokens=300,
147
  top_p=0.9,
148
  temperature=0.7
149
- )
150
 
151
  # Extract the answer text from the API response
152
- queries_t['gpt4omini'] = queries_t.response.choices[0].message.content
153
- queries_t['llamav3p23b'] = queries_t.response_2.choices[0].message.content
154
- queries_t['mistralsmall'] = queries_t.response_3.choices[0].message.content
155
 
156
  # Prepare the output dataframe with selected columns
157
  columns_to_show = []
@@ -291,4 +287,4 @@ with gr.Blocks(theme=Monochrome) as demo:
291
  )
292
 
293
  if __name__ == "__main__":
294
- demo.launch(show_api=False)
 
26
  if 'MISTRAL_API_KEY' not in os.environ:
27
  os.environ['MISTRAL_API_KEY'] = getpass.getpass('Mistral AI API Key:')
28
 
 
 
 
 
 
 
 
29
  # Create prompt function
30
  @pxt.udf
31
  def create_prompt(top_k_list: list[dict], question: str) -> str:
 
80
 
81
  progress(0.4, desc="Generating embeddings...")
82
 
83
+ chunks_t.add_embedding_index(
84
+ 'text',
85
+ idx_name='minilm_idx',
86
+ string_embed=sentence_transformer.using(model_id='sentence-transformers/all-MiniLM-L12-v2')
87
+ )
88
 
89
  # Define a query function to retrieve the top-k most similar chunks for a given question
90
  @chunks_t.query
 
97
  )
98
 
99
  # Add computed columns to the queries table for context retrieval and prompt creation
100
+ queries_t.add_computed_column(question_context=chunks_t.queries.top_k(queries_t.question))
101
+ queries_t.add_computed_column(prompt=create_prompt(
102
  queries_t.question_context, queries_t.question
103
+ ))
104
 
105
  # Prepare messages for the OpenAI API, including system instructions and user prompt
106
  msgs = [
 
117
  progress(0.6, desc="Querying models...")
118
 
119
  # Add OpenAI response column
120
+ queries_t.add_computed_column(response=openai.chat_completions(
121
  model='gpt-4o-mini-2024-07-18',
122
  messages=msgs,
123
  max_tokens=300,
124
  top_p=0.9,
125
  temperature=0.7
126
+ ))
127
 
128
  # Create a table in Pixeltable and pick a model hosted on Anthropic with some parameters
129
+ queries_t.add_computed_column(response_2=f_chat_completions(
130
  messages=msgs,
131
  model='accounts/fireworks/models/llama-v3p2-3b-instruct',
132
  # These parameters are optional and can be used to tune model behavior:
133
  max_tokens=300,
134
  top_p=0.9,
135
  temperature=0.7
136
+ ))
137
 
138
+ queries_t.add_computed_column(response_3=chat_completions(
139
  messages=msgs,
140
  model='mistral-small-latest',
141
  # These parameters are optional and can be used to tune model behavior:
142
  max_tokens=300,
143
  top_p=0.9,
144
  temperature=0.7
145
+ ))
146
 
147
  # Extract the answer text from the API response
148
+ queries_t.add_computed_column(gpt4omini=queries_t.response.choices[0].message.content)
149
+ queries_t.add_computed_column(llamav3p23b=queries_t.response_2.choices[0].message.content)
150
+ queries_t.add_computed_column(mistralsmall=queries_t.response_3.choices[0].message.content)
151
 
152
  # Prepare the output dataframe with selected columns
153
  columns_to_show = []
 
287
  )
288
 
289
  if __name__ == "__main__":
290
+ demo.launch()