ak3ra commited on
Commit
96a1efe
·
1 Parent(s): f694495

Add import statement for prompts in utils/prompts.py

Browse files
Files changed (1) hide show
  1. app.py +126 -161
app.py CHANGED
@@ -1,152 +1,60 @@
1
  # app.py
2
 
 
 
 
3
  import json
4
- from typing import List, Tuple
5
- import os
6
  import logging
 
 
7
 
8
  import gradio as gr
 
9
  from dotenv import load_dotenv
10
  from slugify import slugify
11
 
 
12
  from rag.rag_pipeline import RAGPipeline
13
  from utils.helpers import (
14
- generate_follow_up_questions,
15
  append_to_study_files,
16
  add_study_files_to_chromadb,
17
  chromadb_client,
18
  )
19
- from utils.prompts import (
20
- highlight_prompt,
21
- evidence_based_prompt,
22
- sample_questions,
23
- )
24
- import openai
25
-
26
- from config import STUDY_FILES, OPENAI_API_KEY
27
  from utils.zotero_manager import ZoteroManager
28
 
29
- import csv
30
- import io
31
-
32
- import datetime
33
-
34
- load_dotenv()
35
  logging.basicConfig(level=logging.INFO)
 
 
36
 
37
  openai.api_key = OPENAI_API_KEY
38
 
39
- # After loop, add all collected data to ChromaDB
40
  add_study_files_to_chromadb("study_files.json", "study_files_collection")
41
 
42
  # Cache for RAG pipelines
43
  rag_cache = {}
44
 
45
 
46
- def process_zotero_library_items(
47
- zotero_library_id: str, zotero_api_access_key: str
48
- ) -> str:
49
- if not zotero_library_id or not zotero_api_access_key:
50
- return "Please enter your zotero library Id and API Access Key"
51
-
52
- zotero_library_id = zotero_library_id
53
- zotero_library_type = "user" # or "group"
54
- zotero_api_access_key = zotero_api_access_key
55
-
56
- message = ""
57
-
58
- try:
59
- zotero_manager = ZoteroManager(
60
- zotero_library_id, zotero_library_type, zotero_api_access_key
61
- )
62
-
63
- zotero_collections = zotero_manager.get_collections()
64
- zotero_collection_lists = zotero_manager.list_zotero_collections(
65
- zotero_collections
66
- )
67
- filtered_zotero_collection_lists = (
68
- zotero_manager.filter_and_return_collections_with_items(
69
- zotero_collection_lists
70
- )
71
- )
72
-
73
- study_files_data = {} # Dictionary to collect items for ChromaDB
74
-
75
- for collection in filtered_zotero_collection_lists:
76
- collection_name = collection.get("name")
77
- if collection_name not in STUDY_FILES:
78
- collection_key = collection.get("key")
79
- collection_items = zotero_manager.get_collection_items(collection_key)
80
- zotero_collection_items = (
81
- zotero_manager.get_collection_zotero_items_by_key(collection_key)
82
- )
83
- #### Export zotero collection items to json ####
84
- zotero_items_json = zotero_manager.zotero_items_to_json(
85
- zotero_collection_items
86
- )
87
- export_file = f"{slugify(collection_name)}_zotero_items.json"
88
- zotero_manager.write_zotero_items_to_json_file(
89
- zotero_items_json, f"data/{export_file}"
90
- )
91
- append_to_study_files(
92
- "study_files.json", collection_name, f"data/{export_file}"
93
- )
94
-
95
- # Collect for ChromaDB
96
- study_files_data[collection_name] = f"data/{export_file}"
97
-
98
- # Update in-memory STUDY_FILES for reference in current session
99
- STUDY_FILES.update({collection_name: f"data/{export_file}"})
100
- logging.info(f"STUDY_FILES: {STUDY_FILES}")
101
-
102
- # After loop, add all collected data to ChromaDB
103
- add_study_files_to_chromadb("study_files.json", "study_files_collection")
104
- message = "Successfully processed items in your zotero library"
105
- except Exception as e:
106
- message = f"Error process your zotero library: {str(e)}"
107
-
108
- return message
109
-
110
-
111
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
112
  """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
113
  if study_name not in rag_cache:
114
- # Query ChromaDB for the study file path by ID
115
  collection = chromadb_client.get_or_create_collection("study_files_collection")
116
  result = collection.get(ids=[study_name]) # Retrieve document by ID
117
 
118
- # Check if the result contains the requested document
119
  if not result or len(result["metadatas"]) == 0:
120
  raise ValueError(f"Invalid study name: {study_name}")
121
 
122
- # Extract the file path from the document metadata
123
  study_file = result["metadatas"][0].get("file_path")
124
  if not study_file:
125
  raise ValueError(f"File path not found for study name: {study_name}")
126
 
127
- # Create and cache the RAGPipeline instance
128
  rag_cache[study_name] = RAGPipeline(study_file)
129
 
130
  return rag_cache[study_name]
131
 
132
 
133
- def chat_function(message: str, study_name: str, prompt_type: str) -> str:
134
- """Process a chat message and generate a response using the RAG pipeline."""
135
-
136
- if not message.strip():
137
- return "Please enter a valid query."
138
-
139
- rag = get_rag_pipeline(study_name)
140
- logging.info(f"rag: ==> {rag}")
141
- prompt = {
142
- "Highlight": highlight_prompt,
143
- "Evidence-based": evidence_based_prompt,
144
- }.get(prompt_type)
145
-
146
- response = rag.query(message, prompt_template=prompt)
147
- return response.response
148
-
149
-
150
  def get_study_info(study_name: str) -> str:
151
  """Retrieve information about the specified study."""
152
 
@@ -154,11 +62,9 @@ def get_study_info(study_name: str) -> str:
154
  result = collection.get(ids=[study_name]) # Query by study name (as a list)
155
  logging.info(f"Result: ======> {result}")
156
 
157
- # Check if the document exists in the result
158
  if not result or len(result["metadatas"]) == 0:
159
  raise ValueError(f"Invalid study name: {study_name}")
160
 
161
- # Extract the file path from the document metadata
162
  study_file = result["metadatas"][0].get("file_path")
163
  logging.info(f"study_file: =======> {study_file}")
164
  if not study_file:
@@ -171,46 +77,124 @@ def get_study_info(study_name: str) -> str:
171
 
172
  def markdown_table_to_csv(markdown_text: str) -> str:
173
  """Convert a markdown table to CSV format."""
174
- # Split the text into lines and remove empty lines
175
  lines = [line.strip() for line in markdown_text.split("\n") if line.strip()]
176
-
177
- # Find the table content (lines starting with |)
178
  table_lines = [line for line in lines if line.startswith("|")]
179
 
180
  if not table_lines:
181
  return ""
182
 
183
- # Process each line to extract cell values
184
  csv_data = []
185
  for line in table_lines:
186
- # Skip separator lines (containing only dashes)
187
  if "---" in line:
188
  continue
189
  # Split by |, remove empty strings, and strip whitespace
190
  cells = [cell.strip() for cell in line.split("|") if cell.strip()]
191
  csv_data.append(cells)
192
 
193
- # Create CSV string
194
  output = io.StringIO()
195
  writer = csv.writer(output)
196
  writer.writerows(csv_data)
197
  return output.getvalue()
198
 
199
 
200
- def update_interface(study_name: str) -> Tuple[str, gr.update, gr.update, gr.update]:
201
- """Update the interface based on the selected study."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- study_info = get_study_info(study_name)
204
- questions = sample_questions.get(study_name, [])[:3]
205
- if not questions:
206
- questions = sample_questions.get("General", [])[:3]
207
- visible_questions = [gr.update(visible=True, value=q) for q in questions]
208
- hidden_questions = [gr.update(visible=False) for _ in range(3 - len(questions))]
209
- return (study_info, *visible_questions, *hidden_questions)
210
 
 
 
 
 
 
 
211
 
212
- def set_question(question: str) -> str:
213
- return question.lstrip("✨ ")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
 
215
 
216
  def process_multi_input(text, study_name, prompt_type):
@@ -222,6 +206,25 @@ def process_multi_input(text, study_name, prompt_type):
222
  return [response, gr.update(visible=True)]
223
 
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  def create_gr_interface() -> gr.Blocks:
226
  """
227
  Create and configure the Gradio interface for the RAG platform.
@@ -312,44 +315,6 @@ def create_gr_interface() -> gr.Blocks:
312
  visible=False,
313
  )
314
 
315
- def download_as_csv(markdown_content):
316
- """Convert markdown table to CSV and provide for download."""
317
- if not markdown_content:
318
- return None
319
-
320
- csv_content = markdown_table_to_csv(markdown_content)
321
- if not csv_content:
322
- return None
323
-
324
- # Create temporary file with actual content
325
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
326
- temp_path = f"study_export_{timestamp}.csv"
327
-
328
- with open(temp_path, "w", newline="", encoding="utf-8") as f:
329
- f.write(csv_content)
330
-
331
- return temp_path
332
-
333
- def cleanup_temp_files():
334
- """Clean up old temporary files."""
335
- try:
336
- # Delete files older than 5 minutes
337
- current_time = datetime.datetime.now()
338
- for file in os.listdir():
339
- if file.startswith("study_export_") and file.endswith(".csv"):
340
- file_time = datetime.datetime.fromtimestamp(
341
- os.path.getmtime(file)
342
- )
343
- if (current_time - file_time).seconds > 30: # 5 minutes
344
- try:
345
- os.remove(file)
346
- except Exception as e:
347
- logging.warning(
348
- f"Failed to remove temp file {file}: {e}"
349
- )
350
- except Exception as e:
351
- logging.warning(f"Error during cleanup: {e}")
352
-
353
  study_dropdown.change(
354
  fn=get_study_info,
355
  inputs=study_dropdown,
 
1
  # app.py
2
 
3
+ import csv
4
+ import datetime
5
+ import io
6
  import json
 
 
7
  import logging
8
+ import os
9
+ from typing import Tuple
10
 
11
  import gradio as gr
12
+ import openai
13
  from dotenv import load_dotenv
14
  from slugify import slugify
15
 
16
+ from config import STUDY_FILES, OPENAI_API_KEY
17
  from rag.rag_pipeline import RAGPipeline
18
  from utils.helpers import (
 
19
  append_to_study_files,
20
  add_study_files_to_chromadb,
21
  chromadb_client,
22
  )
23
+ from utils.prompts import highlight_prompt, evidence_based_prompt
 
 
 
 
 
 
 
24
  from utils.zotero_manager import ZoteroManager
25
 
26
+ # Configure logging
 
 
 
 
 
27
  logging.basicConfig(level=logging.INFO)
28
+ logger = logging.getLogger(__name__)
29
+ load_dotenv()
30
 
31
  openai.api_key = OPENAI_API_KEY
32
 
33
+ # Initialize ChromaDB with study files
34
  add_study_files_to_chromadb("study_files.json", "study_files_collection")
35
 
36
  # Cache for RAG pipelines
37
  rag_cache = {}
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def get_rag_pipeline(study_name: str) -> RAGPipeline:
41
  """Get or create a RAGPipeline instance for the given study by querying ChromaDB."""
42
  if study_name not in rag_cache:
 
43
  collection = chromadb_client.get_or_create_collection("study_files_collection")
44
  result = collection.get(ids=[study_name]) # Retrieve document by ID
45
 
 
46
  if not result or len(result["metadatas"]) == 0:
47
  raise ValueError(f"Invalid study name: {study_name}")
48
 
 
49
  study_file = result["metadatas"][0].get("file_path")
50
  if not study_file:
51
  raise ValueError(f"File path not found for study name: {study_name}")
52
 
 
53
  rag_cache[study_name] = RAGPipeline(study_file)
54
 
55
  return rag_cache[study_name]
56
 
57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  def get_study_info(study_name: str) -> str:
59
  """Retrieve information about the specified study."""
60
 
 
62
  result = collection.get(ids=[study_name]) # Query by study name (as a list)
63
  logging.info(f"Result: ======> {result}")
64
 
 
65
  if not result or len(result["metadatas"]) == 0:
66
  raise ValueError(f"Invalid study name: {study_name}")
67
 
 
68
  study_file = result["metadatas"][0].get("file_path")
69
  logging.info(f"study_file: =======> {study_file}")
70
  if not study_file:
 
77
 
78
  def markdown_table_to_csv(markdown_text: str) -> str:
79
  """Convert a markdown table to CSV format."""
 
80
  lines = [line.strip() for line in markdown_text.split("\n") if line.strip()]
 
 
81
  table_lines = [line for line in lines if line.startswith("|")]
82
 
83
  if not table_lines:
84
  return ""
85
 
 
86
  csv_data = []
87
  for line in table_lines:
 
88
  if "---" in line:
89
  continue
90
  # Split by |, remove empty strings, and strip whitespace
91
  cells = [cell.strip() for cell in line.split("|") if cell.strip()]
92
  csv_data.append(cells)
93
 
 
94
  output = io.StringIO()
95
  writer = csv.writer(output)
96
  writer.writerows(csv_data)
97
  return output.getvalue()
98
 
99
 
100
+ def cleanup_temp_files():
101
+ """Clean up old temporary files."""
102
+ try:
103
+ current_time = datetime.datetime.now()
104
+ for file in os.listdir():
105
+ if file.startswith("study_export_") and file.endswith(".csv"):
106
+ file_time = datetime.datetime.fromtimestamp(os.path.getmtime(file))
107
+ # Calculate the time difference in seconds
108
+ time_difference = (current_time - file_time).total_seconds()
109
+ if time_difference > 20: # 5 minutes in seconds
110
+ try:
111
+ os.remove(file)
112
+ except Exception as e:
113
+ logging.warning(f"Failed to remove temp file {file}: {e}")
114
+ except Exception as e:
115
+ logging.warning(f"Error during cleanup: {e}")
116
+
117
+
118
+ def chat_function(message: str, study_name: str, prompt_type: str) -> str:
119
+ """Process a chat message and generate a response using the RAG pipeline."""
120
 
121
+ if not message.strip():
122
+ return "Please enter a valid query."
 
 
 
 
 
123
 
124
+ rag = get_rag_pipeline(study_name)
125
+ logging.info(f"rag: ==> {rag}")
126
+ prompt = {
127
+ "Highlight": highlight_prompt,
128
+ "Evidence-based": evidence_based_prompt,
129
+ }.get(prompt_type)
130
 
131
+ response = rag.query(message, prompt_template=prompt)
132
+ return response.response
133
+
134
+
135
+ def process_zotero_library_items(
136
+ zotero_library_id: str, zotero_api_access_key: str
137
+ ) -> str:
138
+ if not zotero_library_id or not zotero_api_access_key:
139
+ return "Please enter your zotero library Id and API Access Key"
140
+
141
+ zotero_library_id = zotero_library_id
142
+ zotero_library_type = "user" # or "group"
143
+ zotero_api_access_key = zotero_api_access_key
144
+
145
+ message = ""
146
+
147
+ try:
148
+ zotero_manager = ZoteroManager(
149
+ zotero_library_id, zotero_library_type, zotero_api_access_key
150
+ )
151
+
152
+ zotero_collections = zotero_manager.get_collections()
153
+ zotero_collection_lists = zotero_manager.list_zotero_collections(
154
+ zotero_collections
155
+ )
156
+ filtered_zotero_collection_lists = (
157
+ zotero_manager.filter_and_return_collections_with_items(
158
+ zotero_collection_lists
159
+ )
160
+ )
161
+
162
+ study_files_data = {} # Dictionary to collect items for ChromaDB
163
+
164
+ for collection in filtered_zotero_collection_lists:
165
+ collection_name = collection.get("name")
166
+ if collection_name not in STUDY_FILES:
167
+ collection_key = collection.get("key")
168
+ collection_items = zotero_manager.get_collection_items(collection_key)
169
+ zotero_collection_items = (
170
+ zotero_manager.get_collection_zotero_items_by_key(collection_key)
171
+ )
172
+ # Export zotero collection items to json
173
+ zotero_items_json = zotero_manager.zotero_items_to_json(
174
+ zotero_collection_items
175
+ )
176
+ export_file = f"{slugify(collection_name)}_zotero_items.json"
177
+ zotero_manager.write_zotero_items_to_json_file(
178
+ zotero_items_json, f"data/{export_file}"
179
+ )
180
+ append_to_study_files(
181
+ "study_files.json", collection_name, f"data/{export_file}"
182
+ )
183
+
184
+ # Collect for ChromaDB
185
+ study_files_data[collection_name] = f"data/{export_file}"
186
+
187
+ # Update in-memory STUDY_FILES for reference in current session
188
+ STUDY_FILES.update({collection_name: f"data/{export_file}"})
189
+ logging.info(f"STUDY_FILES: {STUDY_FILES}")
190
+
191
+ # After loop, add all collected data to ChromaDB
192
+ add_study_files_to_chromadb("study_files.json", "study_files_collection")
193
+ message = "Successfully processed items in your zotero library"
194
+ except Exception as e:
195
+ message = f"Error process your zotero library: {str(e)}"
196
+
197
+ return message
198
 
199
 
200
  def process_multi_input(text, study_name, prompt_type):
 
206
  return [response, gr.update(visible=True)]
207
 
208
 
209
+ def download_as_csv(markdown_content):
210
+ """Convert markdown table to CSV and provide for download."""
211
+ if not markdown_content:
212
+ return None
213
+
214
+ csv_content = markdown_table_to_csv(markdown_content)
215
+ if not csv_content:
216
+ return None
217
+
218
+ # Create temporary file with actual content
219
+ timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
220
+ temp_path = f"study_export_{timestamp}.csv"
221
+
222
+ with open(temp_path, "w", newline="", encoding="utf-8") as f:
223
+ f.write(csv_content)
224
+
225
+ return temp_path
226
+
227
+
228
  def create_gr_interface() -> gr.Blocks:
229
  """
230
  Create and configure the Gradio interface for the RAG platform.
 
315
  visible=False,
316
  )
317
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
  study_dropdown.change(
319
  fn=get_study_info,
320
  inputs=study_dropdown,