aquibmoin commited on
Commit
31952a4
·
verified ·
1 Parent(s): 4626ef7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -55
app.py CHANGED
@@ -12,14 +12,14 @@ from docx.oxml.ns import nsdecls
12
  from docx.oxml import parse_xml
13
  import io
14
  import tempfile
15
- #from astroquery.nasa_ads import ADS
16
- import pyvo as vo
17
  import pandas as pd
18
  from pinecone import Pinecone
19
  import logging
20
  import re
21
 
22
  from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
 
23
 
24
 
25
  from langchain_openai import ChatOpenAI
@@ -41,9 +41,6 @@ bi_model = AutoModel.from_pretrained(bi_encoder_model_name)
41
  api_key = os.getenv('OPENAI_API_KEY')
42
  client = OpenAI(api_key=api_key)
43
 
44
- # Set up NASA ADS token
45
- #ADS.TOKEN = os.getenv('ADS_API_KEY') # Ensure your ADS API key is stored in environment variables
46
-
47
  # Pinecone setup
48
  pinecone_api_key = os.getenv('PINECONE_API_KEY')
49
  pc = Pinecone(api_key=pinecone_api_key)
@@ -132,24 +129,6 @@ def clean_retrieved_context(raw_context):
132
  # Return explicitly cleaned context
133
  return cleaned.strip()
134
 
135
- def fetch_exoplanet_data():
136
- # Connect to NASA Exoplanet Archive TAP Service
137
- tap_service = vo.dal.TAPService("https://exoplanetarchive.ipac.caltech.edu/TAP")
138
-
139
- # Query to fetch all columns from the pscomppars table
140
- ex_query = """
141
- SELECT TOP 10 pl_name, hostname, sy_snum, sy_pnum, discoverymethod, disc_year, disc_facility, pl_controv_flag, pl_orbper, pl_orbsmax, pl_rade, pl_bmasse, pl_orbeccen, pl_eqt, st_spectype, st_teff, st_rad, st_mass, ra, dec, sy_vmag
142
- FROM pscomppars
143
- """
144
- # Execute the query
145
- qresult = tap_service.search(ex_query)
146
-
147
- # Convert to a Pandas DataFrame
148
- ptable = qresult.to_table()
149
- exoplanet_data = ptable.to_pandas()
150
-
151
- return exoplanet_data
152
-
153
  def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
154
  # Case 1: Both relevant context and science objectives are provided
155
  if relevant_context and science_objectives.strip():
@@ -195,37 +174,6 @@ def generate_response(user_input, science_objectives="", relevant_context="", re
195
  # Return two clearly separated responses
196
  return full_response, response_only
197
 
198
- def generate_data_insights(user_input, exoplanet_data, max_tokens=500, temperature=0.3):
199
- """
200
- Generate insights by passing the user's input along with the exoplanet data to GPT-4.
201
- """
202
- # Convert the dataframe to a readable format for GPT (e.g., CSV-style text)
203
- data_as_text = exoplanet_data.to_csv(index=False) # CSV-style for better readability
204
-
205
- # Create a prompt with the user query and the data sample
206
- insights_prompt = (
207
- f"Analyze the following user query and provide relevant insights based on the provided exoplanet data.\n\n"
208
- f"User Query: {user_input}\n\n"
209
- f"Exoplanet Data:\n{data_as_text}\n\n"
210
- f"Please provide insights that are relevant to the user's query."
211
- )
212
-
213
- # Call GPT-4 to generate insights based on the data and user input
214
- response = client.chat.completions.create(
215
- model="gpt-4",
216
- messages=[
217
- {"role": "system", "content": "You are an expert in analyzing astronomical data and generating insights."},
218
- {"role": "user", "content": insights_prompt}
219
- ],
220
- max_tokens=max_tokens,
221
- temperature=temperature
222
- )
223
-
224
- # Extract and return GPT-4's insights
225
- data_insights = response.choices[0].message.content.strip()
226
- return data_insights
227
-
228
-
229
  def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
230
  doc = Document()
231
 
@@ -430,7 +378,7 @@ def chatbot(user_input, science_objectives="", context="", subdomain="", max_tok
430
 
431
  # Fetch exoplanet data and generate insights
432
  exoplanet_data = fetch_exoplanet_data()
433
- data_insights = generate_data_insights(user_input, exoplanet_data)
434
 
435
  # Extract GPT-generated table into DataFrame
436
  extracted_table_df = gpt_response_to_dataframe(full_response)
 
12
  from docx.oxml import parse_xml
13
  import io
14
  import tempfile
15
+ #import pyvo as vo
 
16
  import pandas as pd
17
  from pinecone import Pinecone
18
  import logging
19
  import re
20
 
21
  from utils.ads_references import extract_keywords_with_gpt, fetch_nasa_ads_references
22
+ from utils.data_insights import fetch_exoplanet_data, generate_data_insights
23
 
24
 
25
  from langchain_openai import ChatOpenAI
 
41
  api_key = os.getenv('OPENAI_API_KEY')
42
  client = OpenAI(api_key=api_key)
43
 
 
 
 
44
  # Pinecone setup
45
  pinecone_api_key = os.getenv('PINECONE_API_KEY')
46
  pc = Pinecone(api_key=pinecone_api_key)
 
129
  # Return explicitly cleaned context
130
  return cleaned.strip()
131
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  def generate_response(user_input, science_objectives="", relevant_context="", references=[], max_tokens=150, temperature=0.7, top_p=0.9, frequency_penalty=0.5, presence_penalty=0.0):
133
  # Case 1: Both relevant context and science objectives are provided
134
  if relevant_context and science_objectives.strip():
 
174
  # Return two clearly separated responses
175
  return full_response, response_only
176
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
  def export_to_word(response_content, subdomain_definition, science_goal, context, max_tokens, temperature, top_p, frequency_penalty, presence_penalty):
178
  doc = Document()
179
 
 
378
 
379
  # Fetch exoplanet data and generate insights
380
  exoplanet_data = fetch_exoplanet_data()
381
+ data_insights_uq = generate_data_insights(user_input, client, exoplanet_data)
382
 
383
  # Extract GPT-generated table into DataFrame
384
  extracted_table_df = gpt_response_to_dataframe(full_response)