Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,9 @@ import streamlit as st
|
|
2 |
import pandas as pd
|
3 |
from langchain_text_splitters import TokenTextSplitter
|
4 |
from langchain.docstore.document import Document
|
|
|
|
|
|
|
5 |
|
6 |
|
7 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
@@ -49,8 +52,28 @@ def get_chunks():
|
|
49 |
"status":giz_df.loc[i,'status'],
|
50 |
"title_main":giz_df.loc[i,'title_main'],}))
|
51 |
return placeholder
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
chunks = get_chunks()
|
53 |
-
|
54 |
|
55 |
button=st.button("search")
|
56 |
|
|
|
2 |
import pandas as pd
|
3 |
from langchain_text_splitters import TokenTextSplitter
|
4 |
from langchain.docstore.document import Document
|
5 |
+
from torch import cuda
|
6 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings, HuggingFaceInferenceAPIEmbeddings
|
7 |
+
device = 'cuda' if cuda.is_available() else 'cpu'
|
8 |
|
9 |
|
10 |
st.set_page_config(page_title="SEARCH IATI",layout='wide')
|
|
|
52 |
"status":giz_df.loc[i,'status'],
|
53 |
"title_main":giz_df.loc[i,'title_main'],}))
|
54 |
return placeholder
|
55 |
+
|
56 |
+
def embed_chunks(chunks):
|
57 |
+
embeddings = HuggingFaceEmbeddings(
|
58 |
+
model_kwargs = {'device': device},
|
59 |
+
encode_kwargs = {'normalize_embeddings': True},
|
60 |
+
model_name='BAAI/bge-m3'
|
61 |
+
)
|
62 |
+
# placeholder for collection
|
63 |
+
qdrant_collections = {}
|
64 |
+
qdrant_collections['all'] = Qdrant.from_documents(
|
65 |
+
chunks,
|
66 |
+
embeddings,
|
67 |
+
path="/data/local_qdrant",
|
68 |
+
collection_name='all',
|
69 |
+
)
|
70 |
+
|
71 |
+
print(qdrant_collections)
|
72 |
+
print("vector embeddings done")
|
73 |
+
return qdrant_collections
|
74 |
+
|
75 |
chunks = get_chunks()
|
76 |
+
qdrant_col = embed_chunks(chunks)
|
77 |
|
78 |
button=st.button("search")
|
79 |
|