Spaces:

Ekimetrics
/

climate-question-answering

Running

App Files Files Community

climate-question-answering / climateqa /engine /talk_to_data /utils.py

timeki

Add Talk to Drias

abafbcc 13 days ago

raw

history blame

3.31 kB

	import re
	import openai
	import pandas as pd
	from geopy.geocoders import Nominatim
	import sqlite3
	import ast


	def detect_location_with_openai(api_key, sentence):
	"""
	Detects locations in a sentence using OpenAI's API.
	"""
	openai.api_key = api_key

	prompt = f"""
	Extract all locations (cities, countries, states, or geographical areas) mentioned in the following sentence.
	Return the result as a Python list. If no locations are mentioned, return an empty list.

	Sentence: "{sentence}"
	"""

	response = openai.chat.completions.create(
	model="gpt-4o-mini",
	messages=[
	{"role": "system", "content": "You are a helpful assistant skilled in identifying locations in text."},
	{"role": "user", "content": prompt}
	],
	max_tokens=100,
	temperature=0
	)

	return response.choices[0].message.content.split("\n")[1][2:-2]


	def detectTable(sql_query):
	pattern = r'(?i)\bFROM\s+((?:`[^`]+`\|"[^"]+"\|\'[^\']+\'\|\w+)(?:\.(?:`[^`]+`\|"[^"]+"\|\'[^\']+\'\|\w+))*)'
	matches = re.findall(pattern, sql_query)
	return matches



	def loc2coords(location : str):
	geolocator = Nominatim(user_agent="city_to_latlong")
	location = geolocator.geocode(location)
	return (location.latitude, location.longitude)


	def coords2loc(coords : tuple):
	geolocator = Nominatim(user_agent="coords_to_city")
	try:
	location = geolocator.reverse(coords)
	return location.address
	except Exception as e:
	print(f"Error: {e}")
	return "Unknown Location"


	def nearestNeighbourSQL(db: str, location: tuple, table : str):
	conn = sqlite3.connect(db)
	long = round(location[1], 3)
	lat = round(location[0], 3)
	cursor = conn.cursor()
	cursor.execute(f"SELECT lat, lon FROM {table} WHERE lat BETWEEN {lat - 0.3} AND {lat + 0.3} AND lon BETWEEN {long - 0.3} AND {long + 0.3}")
	results = cursor.fetchall()
	return results[0]

	def detect_relevant_tables(user_question, llm):
	table_names_list = [
	"Frequency_of_rainy_days_index",
	"Winter_precipitation_total",
	"Summer_precipitation_total",
	"Annual_precipitation_total",
	# "Remarkable_daily_precipitation_total_(Q99)",
	"Frequency_of_remarkable_daily_precipitation",
	"Extreme_precipitation_intensity",
	"Mean_winter_temperature",
	"Mean_summer_temperature",
	"Number_of_tropical_nights",
	"Maximum_summer_temperature",
	"Number_of_days_with_Tx_above_30C",
	"Number_of_days_with_Tx_above_35C",
	"Drought_index"
	]
	prompt = (
	f"You are helping to build a sql query to retrieve relevant data for a user question."
	f"The different tables are {table_names_list}."
	f"The user question is {user_question}. Write the relevant tables to use. Answer only a python list of table name."
	)
	table_names = ast.literal_eval(llm.invoke(prompt).content.strip("```python\n").strip())
	return table_names

	def replace_coordonates(coords, query, coords_tables):
	n = query.count(str(coords[0]))

	for i in range(n):
	query = query.replace(str(coords[0]), str(coords_tables[i][0]),1)
	query = query.replace(str(coords[1]), str(coords_tables[i][1]),1)
	return query