Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from langchain_core.runnables import RunnablePassthrough
|
|
10 |
from qdrant_client import QdrantClient
|
11 |
from qdrant_client.http.models import Distance, VectorParams
|
12 |
from operator import itemgetter
|
13 |
-
from
|
14 |
|
15 |
# Set up API keys
|
16 |
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
|
@@ -21,16 +21,6 @@ pdf_links = [
|
|
21 |
"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
|
22 |
]
|
23 |
|
24 |
-
class CustomHuggingFaceEmbeddings:
|
25 |
-
def __init__(self, model_name):
|
26 |
-
self.model = SentenceTransformer(model_name)
|
27 |
-
|
28 |
-
def embed_documents(self, texts):
|
29 |
-
return self.model.encode(texts).tolist()
|
30 |
-
|
31 |
-
def embed_query(self, text):
|
32 |
-
return self.model.encode(text).tolist()
|
33 |
-
|
34 |
@st.cache_resource
|
35 |
def load_and_process_pdfs(pdf_links):
|
36 |
documents = []
|
@@ -51,22 +41,26 @@ def load_and_process_pdfs(pdf_links):
|
|
51 |
def setup_vectorstore():
|
52 |
LOCATION = ":memory:"
|
53 |
COLLECTION_NAME = "AI_Ethics_Framework"
|
54 |
-
|
55 |
-
|
56 |
qdrant_client = QdrantClient(location=LOCATION)
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
# Create the collection
|
59 |
qdrant_client.create_collection(
|
60 |
collection_name=COLLECTION_NAME,
|
61 |
vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
|
62 |
)
|
63 |
|
64 |
-
# Create the vector store
|
65 |
-
custom_embeddings = CustomHuggingFaceEmbeddings("Technocoloredgeek/midterm-finetuned-embedding")
|
66 |
qdrant_vector_store = QdrantVectorStore(
|
67 |
client=qdrant_client,
|
68 |
collection_name=COLLECTION_NAME,
|
69 |
-
|
70 |
)
|
71 |
|
72 |
# Load and add documents
|
|
|
10 |
from qdrant_client import QdrantClient
|
11 |
from qdrant_client.http.models import Distance, VectorParams
|
12 |
from operator import itemgetter
|
13 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
14 |
|
15 |
# Set up API keys
|
16 |
os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
|
|
|
21 |
"https://www.whitehouse.gov/wp-content/uploads/2022/10/Blueprint-for-an-AI-Bill-of-Rights.pdf"
|
22 |
]
|
23 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
@st.cache_resource
|
25 |
def load_and_process_pdfs(pdf_links):
|
26 |
documents = []
|
|
|
41 |
def setup_vectorstore():
|
42 |
LOCATION = ":memory:"
|
43 |
COLLECTION_NAME = "AI_Ethics_Framework"
|
44 |
+
|
|
|
45 |
qdrant_client = QdrantClient(location=LOCATION)
|
46 |
|
47 |
+
# Create the embeddings
|
48 |
+
embeddings = HuggingFaceEmbeddings(model_name="Technocoloredgeek/midterm-finetuned-embedding")
|
49 |
+
|
50 |
+
# Get the vector size from the embeddings
|
51 |
+
VECTOR_SIZE = len(embeddings.embed_query("test"))
|
52 |
+
|
53 |
# Create the collection
|
54 |
qdrant_client.create_collection(
|
55 |
collection_name=COLLECTION_NAME,
|
56 |
vectors_config=VectorParams(size=VECTOR_SIZE, distance=Distance.COSINE),
|
57 |
)
|
58 |
|
59 |
+
# Create the vector store
|
|
|
60 |
qdrant_vector_store = QdrantVectorStore(
|
61 |
client=qdrant_client,
|
62 |
collection_name=COLLECTION_NAME,
|
63 |
+
embeddings=embeddings
|
64 |
)
|
65 |
|
66 |
# Load and add documents
|