Spaces:
Build error
Build error
import weaviate | |
import streamlit as st | |
from weaviate.embedded import EmbeddedOptions | |
from weaviate import Client | |
import pandas as pd # <-- Add this import | |
from io import StringIO # <-- Add this import | |
import pandas as pd | |
def hybrid_search_weaviate(client, selected_class, query): | |
""" | |
Perform a hybrid search on Weaviate using the provided class and query. | |
Return the results as a list of dictionaries. | |
""" | |
# Construct the search query | |
search_query = { | |
"where": { | |
"path": ["*"], | |
"operator": "Like", | |
"valueString": query | |
} | |
} | |
# Execute the query and retrieve the results | |
results = client.query.get(selected_class, "*").with_where(search_query).do() | |
# Extract the data objects from the results | |
data_objects = results.get('data', {}).get('Get', {}).get('Things', []) | |
return data_objects | |
def convert_to_tapas_format(data): | |
""" | |
Convert the list of dictionaries (from Weaviate) into the format TAPAS expects. | |
Return the table as a list of lists. | |
""" | |
# Extract the data objects from the results | |
data_objects = data.get('data', {}).get('Get', {}).get('Things', []) | |
# Convert the data objects into a DataFrame | |
df = pd.DataFrame([obj['thing'] for obj in data_objects]) | |
table = [df.columns.tolist()] + df.values.tolist() | |
return table | |
def initialize_weaviate_client(): | |
return weaviate.Client(embedded_options=EmbeddedOptions()) | |
def class_exists(client, class_name): | |
try: | |
client.schema.get_class(class_name) | |
return True | |
except: | |
return False | |
def map_dtype_to_weaviate(dtype): | |
if "int" in str(dtype): | |
return "int" | |
elif "float" in str(dtype): | |
return "number" | |
elif "bool" in str(dtype): | |
return "boolean" | |
else: | |
return "string" | |
def create_new_class_schema(client, class_name, class_description): | |
class_schema = { | |
"class": class_name, | |
"description": class_description, | |
"properties": [] | |
} | |
try: | |
client.schema.create({"classes": [class_schema]}) | |
st.success(f"Class {class_name} created successfully!") | |
except Exception as e: | |
st.error(f"Error creating class: {e}") | |
def ingest_data_to_weaviate(client, csv_file, selected_class): | |
# Read the CSV data | |
data = csv_file.read().decode("utf-8") | |
dataframe = pd.read_csv(StringIO(data)) | |
# Fetch the schema for the selected class | |
class_schema = get_class_schema(client, selected_class) | |
# If the schema is empty, create it based on the CSV columns | |
if not class_schema or not class_schema["properties"]: | |
for column_name, data_type in zip(dataframe.columns, dataframe.dtypes): | |
property_schema = { | |
"name": column_name, | |
"description": f"Property for {column_name}", | |
"dataType": [map_dtype_to_weaviate(data_type)] | |
} | |
try: | |
client.schema.property.create(selected_class, property_schema) | |
except weaviate.exceptions.SchemaValidationException: | |
# Property might already exist, so we can continue | |
pass | |
else: | |
# If the schema is not empty, compare it with the CSV columns | |
schema_columns = [prop["name"] for prop in class_schema["properties"]] | |
if set(dataframe.columns) != set(schema_columns): | |
st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.") | |
return | |
# Ingest the data into Weaviate | |
data = dataframe.to_dict(orient="records") | |
for record in data: | |
try: | |
client.data_object.create(record, selected_class) | |
except Exception as e: | |
st.error(f"Error ingesting record: {e}") | |
# Display a preview of the ingested data | |
st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'") | |
st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview | |
# Return the dataframe for preview | |
return dataframe # Added this line | |
def get_class_schema(client, class_name): | |
try: | |
schema = client.schema.get() | |
for cls in schema["classes"]: | |
if cls["class"] == class_name: | |
return cls | |
return None | |
except weaviate.exceptions.SchemaValidationException: | |
return None |