Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -66,6 +66,8 @@ def get_chunks():
|
|
66 |
'language', 'start_year', 'end_year','poli_trager'], inplace=True)
|
67 |
giz_df.rename(columns = {'project_name':'title_main','countries':'country_name',
|
68 |
'client':'org','project_description':'description_main'}, inplace=True)
|
|
|
|
|
69 |
df = pd.concat([projects_df,giz_df],ignore_index=True)
|
70 |
print(df.columns)
|
71 |
|
@@ -75,12 +77,9 @@ def get_chunks():
|
|
75 |
placeholder= []
|
76 |
for i in range(len(giz_df)):
|
77 |
placeholder.append(Document(page_content= giz_df.loc[i,'chunks'],
|
78 |
-
metadata={"
|
79 |
-
"
|
80 |
"country_name":str(giz_df.loc[i,'country_name']),
|
81 |
-
"crs_5_name": giz_df.loc[i,'crs_5_name'],
|
82 |
-
"crs_3_name": giz_df.loc[i,'crs_3_name'],
|
83 |
-
"sgd_pred_str":giz_df.loc[i,'sgd_pred_str'],
|
84 |
"status":giz_df.loc[i,'status'],
|
85 |
"title_main":giz_df.loc[i,'title_main'],}))
|
86 |
return placeholder
|
@@ -112,17 +111,15 @@ def embed_chunks(chunks):
|
|
112 |
encode_kwargs = {'normalize_embeddings': True},
|
113 |
model_name='BAAI/bge-m3'
|
114 |
)
|
115 |
-
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
116 |
# placeholder for collection
|
117 |
print("starting embedding")
|
118 |
qdrant_collections = {}
|
119 |
-
qdrant_collections['
|
120 |
chunks,
|
121 |
embeddings,
|
122 |
-
sparse_embeddings = sparse_embeddings,
|
123 |
path="/data/local_qdrant",
|
124 |
-
collection_name='
|
125 |
-
retrieval_mode=RetrievalMode.HYBRID,
|
126 |
)
|
127 |
|
128 |
print(qdrant_collections)
|
|
|
66 |
'language', 'start_year', 'end_year','poli_trager'], inplace=True)
|
67 |
giz_df.rename(columns = {'project_name':'title_main','countries':'country_name',
|
68 |
'client':'org','project_description':'description_main'}, inplace=True)
|
69 |
+
giz_df['source'] = 'GIZ_WORLDWIDE'
|
70 |
+
giz_df['status'] = "None"
|
71 |
df = pd.concat([projects_df,giz_df],ignore_index=True)
|
72 |
print(df.columns)
|
73 |
|
|
|
77 |
placeholder= []
|
78 |
for i in range(len(giz_df)):
|
79 |
placeholder.append(Document(page_content= giz_df.loc[i,'chunks'],
|
80 |
+
metadata={"id": giz_df.loc[i,'id'],
|
81 |
+
"org":giz_df.loc[i,'org'],
|
82 |
"country_name":str(giz_df.loc[i,'country_name']),
|
|
|
|
|
|
|
83 |
"status":giz_df.loc[i,'status'],
|
84 |
"title_main":giz_df.loc[i,'title_main'],}))
|
85 |
return placeholder
|
|
|
111 |
encode_kwargs = {'normalize_embeddings': True},
|
112 |
model_name='BAAI/bge-m3'
|
113 |
)
|
114 |
+
#sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
|
115 |
# placeholder for collection
|
116 |
print("starting embedding")
|
117 |
qdrant_collections = {}
|
118 |
+
qdrant_collections['all'] = Qdrant.from_documents(
|
119 |
chunks,
|
120 |
embeddings,
|
|
|
121 |
path="/data/local_qdrant",
|
122 |
+
collection_name='all',
|
|
|
123 |
)
|
124 |
|
125 |
print(qdrant_collections)
|