Spaces:
Build error
Build error
import uuid | |
import pandas as pd | |
import numpy as np | |
from .prompts import extractConcepts | |
from .prompts import graphPrompt | |
def documents2Dataframe(documents) -> pd.DataFrame: | |
rows = [] | |
for chunk in documents: | |
row = { | |
"text": chunk.page_content, | |
**chunk.metadata, | |
"chunk_id": uuid.uuid4().hex, | |
} | |
rows = rows + [row] | |
df = pd.DataFrame(rows) | |
return df | |
def df2ConceptsList(dataframe: pd.DataFrame) -> list: | |
# dataframe.reset_index(inplace=True) | |
results = dataframe.apply( | |
lambda row: extractConcepts( | |
row.text, {"chunk_id": row.chunk_id, "type": "concept"} | |
), | |
axis=1, | |
) | |
# invalid json results in NaN | |
results = results.dropna() | |
results = results.reset_index(drop=True) | |
## Flatten the list of lists to one single list of entities. | |
concept_list = np.concatenate(results).ravel().tolist() | |
return concept_list | |
def concepts2Df(concepts_list) -> pd.DataFrame: | |
## Remove all NaN entities | |
concepts_dataframe = pd.DataFrame(concepts_list).replace(" ", np.nan) | |
concepts_dataframe = concepts_dataframe.dropna(subset=["entity"]) | |
concepts_dataframe["entity"] = concepts_dataframe["entity"].apply( | |
lambda x: x.lower() | |
) | |
return concepts_dataframe | |
def df2Graph(dataframe: pd.DataFrame, model=None) -> list: | |
# dataframe.reset_index(inplace=True) | |
results = dataframe.apply( | |
lambda row: graphPrompt(row.text, {"chunk_id": row.chunk_id}, model), axis=1 | |
) | |
# invalid json results in NaN | |
results = results.dropna() | |
results = results.reset_index(drop=True) | |
## Flatten the list of lists to one single list of entities. | |
concept_list = np.concatenate(results).ravel().tolist() | |
return concept_list | |
def graph2Df(nodes_list) -> pd.DataFrame: | |
## Remove all NaN entities | |
graph_dataframe = pd.DataFrame(nodes_list).replace(" ", np.nan) | |
graph_dataframe = graph_dataframe.dropna(subset=["node_1", "node_2"]) | |
graph_dataframe["node_1"] = graph_dataframe["node_1"].apply(lambda x: x.lower()) | |
graph_dataframe["node_2"] = graph_dataframe["node_2"].apply(lambda x: x.lower()) | |
return graph_dataframe | |