Spaces:
Build error
Build error
File size: 4,511 Bytes
736842d b1a798e 736842d b1a798e 13778dd 955af6a f9e10ad 69983c5 f9e10ad 69983c5 faa37d1 2fbdaee 13778dd faa37d1 13778dd c2c40bf 2fbdaee f8ac089 13778dd 475a4a4 13778dd 736842d 3ae17c8 340cc83 3ae17c8 340cc83 3ae17c8 1cb0871 3ae17c8 1cb0871 3ae17c8 41b5bdf 1cb0871 3ae17c8 1cb0871 3ae17c8 1cb0871 3ceb12a 1cb0871 093848b 41b5bdf 5e4315c 41b5bdf 5e4315c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import weaviate
import streamlit as st
from weaviate.embedded import EmbeddedOptions
from weaviate import Client
import pandas as pd # <-- Add this import
from io import StringIO # <-- Add this import
import pandas as pd
def hybrid_search_weaviate(client, selected_class, query):
"""
Perform a hybrid search on Weaviate using the provided class and query.
Return the results as a list of dictionaries.
"""
# Construct the search query
search_query = {
"where": {
"path": ["*"],
"operator": "Like",
"valueString": query
}
}
# Execute the query and retrieve the results
results = client.query.get(selected_class, "*").with_where(search_query).do()
# Extract the data objects from the results
data_objects = results.get('data', {}).get('Get', {}).get('Things', [])
return data_objects
def convert_to_tapas_format(data):
"""
Convert the list of dictionaries (from Weaviate) into the format TAPAS expects.
Return the table as a list of lists.
"""
# Extract the data objects from the results
data_objects = data.get('data', {}).get('Get', {}).get('Things', [])
# Convert the data objects into a DataFrame
df = pd.DataFrame([obj['thing'] for obj in data_objects])
table = [df.columns.tolist()] + df.values.tolist()
return table
def initialize_weaviate_client():
return weaviate.Client(embedded_options=EmbeddedOptions())
def class_exists(client, class_name):
try:
client.schema.get_class(class_name)
return True
except:
return False
def map_dtype_to_weaviate(dtype):
if "int" in str(dtype):
return "int"
elif "float" in str(dtype):
return "number"
elif "bool" in str(dtype):
return "boolean"
else:
return "string"
def create_new_class_schema(client, class_name, class_description):
class_schema = {
"class": class_name,
"description": class_description,
"properties": []
}
try:
client.schema.create({"classes": [class_schema]})
st.success(f"Class {class_name} created successfully!")
except Exception as e:
st.error(f"Error creating class: {e}")
def ingest_data_to_weaviate(client, csv_file, selected_class):
# Read the CSV data
data = csv_file.read().decode("utf-8")
dataframe = pd.read_csv(StringIO(data))
# Fetch the schema for the selected class
class_schema = get_class_schema(client, selected_class)
# If the schema is empty, create it based on the CSV columns
if not class_schema or not class_schema["properties"]:
for column_name, data_type in zip(dataframe.columns, dataframe.dtypes):
property_schema = {
"name": column_name,
"description": f"Property for {column_name}",
"dataType": [map_dtype_to_weaviate(data_type)]
}
try:
client.schema.property.create(selected_class, property_schema)
except weaviate.exceptions.SchemaValidationException:
# Property might already exist, so we can continue
pass
else:
# If the schema is not empty, compare it with the CSV columns
schema_columns = [prop["name"] for prop in class_schema["properties"]]
if set(dataframe.columns) != set(schema_columns):
st.error("The columns in the uploaded CSV do not match the schema of the selected class. Please check and upload the correct CSV or create a new class.")
return
# Ingest the data into Weaviate
data = dataframe.to_dict(orient="records")
for record in data:
try:
client.data_object.create(record, selected_class)
except Exception as e:
st.error(f"Error ingesting record: {e}")
# Display a preview of the ingested data
st.write(f"Your CSV was successfully integrated into the vector database under the class '{selected_class}'")
st.write(dataframe.head()) # Display the first few rows of the dataframe as a preview
# Return the dataframe for preview
return dataframe # Added this line
def get_class_schema(client, class_name):
try:
schema = client.schema.get()
for cls in schema["classes"]:
if cls["class"] == class_name:
return cls
return None
except weaviate.exceptions.SchemaValidationException:
return None |