Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -41,15 +41,15 @@ recursive_framework_document = metadata_generator(ai_framework_document, "AI Fra
|
|
41 |
recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
|
42 |
combined_documents = recursive_framework_document + recursive_blueprint_document
|
43 |
|
44 |
-
"""
|
45 |
-
#from transformers import AutoTokenizer, AutoModel
|
46 |
-
#import torch
|
47 |
-
#embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
48 |
-
#tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
49 |
|
50 |
-
from
|
51 |
-
|
52 |
-
embeddings =
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
vectorstore = Qdrant.from_documents(
|
55 |
documents=combined_documents,
|
@@ -57,41 +57,7 @@ vectorstore = Qdrant.from_documents(
|
|
57 |
location=":memory:",
|
58 |
collection_name="ai_policy"
|
59 |
)
|
60 |
-
"""
|
61 |
-
|
62 |
-
from transformers import AutoTokenizer, AutoModel
|
63 |
-
import torch
|
64 |
-
from qdrant_client import QdrantClient
|
65 |
-
|
66 |
-
# Load the tokenizer and model
|
67 |
-
tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
68 |
-
model = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
69 |
-
|
70 |
-
# Define a wrapper function for embedding documents
|
71 |
-
def embed(documents):
|
72 |
-
inputs = tokenizer(documents, return_tensors="pt", padding=True, truncation=True)
|
73 |
-
with torch.no_grad():
|
74 |
-
outputs = model(**inputs)
|
75 |
-
return outputs.last_hidden_state.mean(dim=1).numpy() # Return embeddings
|
76 |
-
|
77 |
-
# Initialize Qdrant client (in-memory for testing)
|
78 |
-
qdrant_client = QdrantClient(":memory:")
|
79 |
-
|
80 |
-
# Create the Qdrant collection
|
81 |
-
qdrant_client.recreate_collection(
|
82 |
-
collection_name="ai_policy",
|
83 |
-
vectors_config={"size": 768, "distance": "Cosine"} # Adjust size based on embedding dimensions
|
84 |
-
)
|
85 |
-
|
86 |
|
87 |
-
# Create vectorstore (with embedding function)
|
88 |
-
vectorstore = qdrant_client.upsert(
|
89 |
-
collection_name="ai_policy",
|
90 |
-
points=[
|
91 |
-
{"id": i, "vector": embed([doc])[0], "payload": {"document": doc}}
|
92 |
-
for i, doc in enumerate(combined_documents)
|
93 |
-
]
|
94 |
-
)
|
95 |
retriever = vectorstore.as_retriever()
|
96 |
|
97 |
## Generation LLM
|
|
|
41 |
recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
|
42 |
combined_documents = recursive_framework_document + recursive_blueprint_document
|
43 |
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
from transformers import AutoTokenizer, AutoModel
|
46 |
+
import torch
|
47 |
+
embeddings = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
48 |
+
tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
|
49 |
+
|
50 |
+
#from sentence_transformers import SentenceTransformer
|
51 |
+
#embedding_model = SentenceTransformer("Cheselle/finetuned-arctic-sentence")
|
52 |
+
#embeddings = embedding_model.encode(ai_framework_document + ai_blueprint_document)
|
53 |
|
54 |
vectorstore = Qdrant.from_documents(
|
55 |
documents=combined_documents,
|
|
|
57 |
location=":memory:",
|
58 |
collection_name="ai_policy"
|
59 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
retriever = vectorstore.as_retriever()
|
62 |
|
63 |
## Generation LLM
|