juancho72h commited on
Commit
1a7916d
·
verified ·
1 Parent(s): 30cb161

Upload app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -19
app.py CHANGED
@@ -5,7 +5,7 @@ import gradio as gr
5
  import torch
6
  from dotenv import load_dotenv
7
  from pinecone import Pinecone
8
- from langchain_huggingface import HuggingFaceEmbeddings
9
 
10
  # Detect GPU availability and set device
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -39,48 +39,74 @@ index = initialize_pinecone_index(index_name)
39
  # Initialize HuggingFace embedding model
40
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
41
 
 
 
 
 
 
 
 
 
 
 
 
42
  # Function to interact with Pinecone and OpenAI GPT-4
43
- def get_model_response(human_input, chat_history=None):
44
  try:
45
- # Fetching and embedding query (move tensors to GPU)
46
  query_embedding = torch.tensor(embedding_model.embed_query(human_input)).to(device)
47
-
48
- # Convert NumPy array to list before passing it to Pinecone or any API that requires JSON-serializable data
49
  query_embedding = query_embedding.cpu().numpy().tolist()
50
 
51
- # Query Pinecone index using CPU or GPU for fast vector search (if supported)
52
  search_results = index.query(vector=query_embedding, top_k=2, include_metadata=True)
53
 
54
  context_list, images = [], []
55
  for ind, result in enumerate(search_results['matches']):
56
- document_content = result.get('metadata', {}).get('content', 'No content found')
57
- image_url = result.get('metadata', {}).get('image_path', None)
58
- figure_desc = result.get('metadata', {}).get('figure_description', '')
59
-
60
  context_list.append(f"Document {ind+1}: {document_content}")
61
-
62
  if image_url and figure_desc:
63
  images.append((figure_desc, image_url))
64
-
65
  context_string = '\n\n'.join(context_list)
66
- messages = [{"role": "system", "content": "You are a helpful assistant."},
67
- {"role": "user", "content": f"Here is some context:\n{context_string}\n\nUser's question: {human_input}"}]
68
-
69
- # Generate response using OpenAI GPT-3.5 Turbo for faster responses
 
 
 
 
 
 
 
 
 
 
 
 
70
  response = openai.ChatCompletion.create(
71
  model="gpt-3.5-turbo",
72
  messages=messages,
73
  max_tokens=500,
74
  temperature=0.5
75
  )
 
76
  output_text = response['choices'][0]['message']['content'].strip()
 
 
 
 
77
  return output_text, images
 
78
  except Exception as e:
79
  return f"Error invoking model: {str(e)}", []
80
 
81
- # Function to format text and images for display
82
- def get_model_response_with_images(human_input, chat_history=None):
83
- output_text, images = get_model_response(human_input, chat_history)
84
  if images:
85
  image_output = "".join([f"\n\n**{figure_desc}**\n![{figure_desc}]({image_path})" for figure_desc, image_path in images])
86
  return output_text + image_output
 
5
  import torch
6
  from dotenv import load_dotenv
7
  from pinecone import Pinecone
8
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
9
 
10
  # Detect GPU availability and set device
11
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
39
  # Initialize HuggingFace embedding model
40
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/msmarco-distilbert-base-v4")
41
 
42
+ # Initialize chat history manually
43
+ chat_history = []
44
+
45
+ # Helper function to recursively flatten any list to a string
46
+ def flatten_to_string(data):
47
+ if isinstance(data, list):
48
+ return " ".join([flatten_to_string(item) for item in data])
49
+ if data is None:
50
+ return ""
51
+ return str(data)
52
+
53
  # Function to interact with Pinecone and OpenAI GPT-4
54
+ def get_model_response(human_input):
55
  try:
56
+ # Embed the query
57
  query_embedding = torch.tensor(embedding_model.embed_query(human_input)).to(device)
 
 
58
  query_embedding = query_embedding.cpu().numpy().tolist()
59
 
60
+ # Query Pinecone index
61
  search_results = index.query(vector=query_embedding, top_k=2, include_metadata=True)
62
 
63
  context_list, images = [], []
64
  for ind, result in enumerate(search_results['matches']):
65
+ document_content = flatten_to_string(result.get('metadata', {}).get('content', 'No content found'))
66
+ image_url = flatten_to_string(result.get('metadata', {}).get('image_path', None))
67
+ figure_desc = flatten_to_string(result.get('metadata', {}).get('figure_description', ''))
68
+
69
  context_list.append(f"Document {ind+1}: {document_content}")
 
70
  if image_url and figure_desc:
71
  images.append((figure_desc, image_url))
72
+
73
  context_string = '\n\n'.join(context_list)
74
+
75
+ # Add user message to chat history
76
+ chat_history.append({"role": "user", "content": human_input})
77
+
78
+ # Create messages for OpenAI's API
79
+ messages = [{"role": "system", "content": "You are a helpful assistant."}] + chat_history + [
80
+ {"role": "system", "content": f"Here is some context:\n{context_string}"},
81
+ {"role": "user", "content": human_input}
82
+ ]
83
+
84
+ # Validate messages before sending to OpenAI
85
+ for message in messages:
86
+ if not isinstance(message, dict) or "role" not in message or "content" not in message:
87
+ raise ValueError(f"Invalid message format: {message}")
88
+
89
+ # Send the conversation to OpenAI's API
90
  response = openai.ChatCompletion.create(
91
  model="gpt-3.5-turbo",
92
  messages=messages,
93
  max_tokens=500,
94
  temperature=0.5
95
  )
96
+
97
  output_text = response['choices'][0]['message']['content'].strip()
98
+
99
+ # Add assistant message to chat history
100
+ chat_history.append({"role": "assistant", "content": output_text})
101
+
102
  return output_text, images
103
+
104
  except Exception as e:
105
  return f"Error invoking model: {str(e)}", []
106
 
107
+ # Function to format text and images for display and track conversation
108
+ def get_model_response_with_images(human_input, history=None):
109
+ output_text, images = get_model_response(human_input)
110
  if images:
111
  image_output = "".join([f"\n\n**{figure_desc}**\n![{figure_desc}]({image_path})" for figure_desc, image_path in images])
112
  return output_text + image_output