Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -41,7 +41,7 @@ recursive_framework_document = metadata_generator(ai_framework_document, "AI Fra
|
|
41 |
recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
|
42 |
combined_documents = recursive_framework_document + recursive_blueprint_document
|
43 |
|
44 |
-
|
45 |
#from transformers import AutoTokenizer, AutoModel
|
46 |
#import torch
|
47 |
#embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
@@ -57,6 +57,43 @@ vectorstore = Qdrant.from_documents(
|
|
57 |
location=":memory:",
|
58 |
collection_name="ai_policy"
|
59 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
retriever = vectorstore.as_retriever()
|
61 |
|
62 |
## Generation LLM
|
|
|
41 |
recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
|
42 |
combined_documents = recursive_framework_document + recursive_blueprint_document
|
43 |
|
44 |
+
"""
|
45 |
#from transformers import AutoTokenizer, AutoModel
|
46 |
#import torch
|
47 |
#embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
|
|
57 |
location=":memory:",
|
58 |
collection_name="ai_policy"
|
59 |
)
|
60 |
+
"""
|
61 |
+
|
62 |
+
from transformers import AutoTokenizer, AutoModel
|
63 |
+
import torch
|
64 |
+
from qdrant_client import QdrantClient
|
65 |
+
|
66 |
+
# Load the tokenizer and model
|
67 |
+
tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
68 |
+
model = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
69 |
+
|
70 |
+
# Define a wrapper function for embedding documents
|
71 |
+
def embed(documents):
|
72 |
+
inputs = tokenizer(documents, return_tensors="pt", padding=True, truncation=True)
|
73 |
+
with torch.no_grad():
|
74 |
+
outputs = model(**inputs)
|
75 |
+
return outputs.last_hidden_state.mean(dim=1).numpy() # Return embeddings
|
76 |
+
|
77 |
+
# Initialize Qdrant client (in-memory for testing)
|
78 |
+
qdrant_client = QdrantClient(":memory:")
|
79 |
+
|
80 |
+
# Create the Qdrant collection
|
81 |
+
qdrant_client.recreate_collection(
|
82 |
+
collection_name="ai_policy",
|
83 |
+
vectors_config={"size": 768, "distance": "Cosine"} # Adjust size based on embedding dimensions
|
84 |
+
)
|
85 |
+
|
86 |
+
# Example documents
|
87 |
+
combined_documents = ["This is document one.", "This is document two."]
|
88 |
+
|
89 |
+
# Create vectorstore (with embedding function)
|
90 |
+
vectorstore = qdrant_client.upsert(
|
91 |
+
collection_name="ai_policy",
|
92 |
+
points=[
|
93 |
+
{"id": i, "vector": embed([doc])[0], "payload": {"document": doc}}
|
94 |
+
for i, doc in enumerate(combined_documents)
|
95 |
+
]
|
96 |
+
)
|
97 |
retriever = vectorstore.as_retriever()
|
98 |
|
99 |
## Generation LLM
|