Cheselle commited on
Commit
38298ad
·
verified ·
1 Parent(s): 192368b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -1
app.py CHANGED
@@ -41,7 +41,7 @@ recursive_framework_document = metadata_generator(ai_framework_document, "AI Fra
41
  recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
42
  combined_documents = recursive_framework_document + recursive_blueprint_document
43
 
44
-
45
  #from transformers import AutoTokenizer, AutoModel
46
  #import torch
47
  #embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
@@ -57,6 +57,43 @@ vectorstore = Qdrant.from_documents(
57
  location=":memory:",
58
  collection_name="ai_policy"
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  retriever = vectorstore.as_retriever()
61
 
62
  ## Generation LLM
 
41
  recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
42
  combined_documents = recursive_framework_document + recursive_blueprint_document
43
 
44
+ """
45
  #from transformers import AutoTokenizer, AutoModel
46
  #import torch
47
  #embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
 
57
  location=":memory:",
58
  collection_name="ai_policy"
59
  )
60
+ """
61
+
62
+ from transformers import AutoTokenizer, AutoModel
63
+ import torch
64
+ from qdrant_client import QdrantClient
65
+
66
+ # Load the tokenizer and model
67
+ tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
68
+ model = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
69
+
70
+ # Define a wrapper function for embedding documents
71
+ def embed(documents):
72
+ inputs = tokenizer(documents, return_tensors="pt", padding=True, truncation=True)
73
+ with torch.no_grad():
74
+ outputs = model(**inputs)
75
+ return outputs.last_hidden_state.mean(dim=1).numpy() # Return embeddings
76
+
77
+ # Initialize Qdrant client (in-memory for testing)
78
+ qdrant_client = QdrantClient(":memory:")
79
+
80
+ # Create the Qdrant collection
81
+ qdrant_client.recreate_collection(
82
+ collection_name="ai_policy",
83
+ vectors_config={"size": 768, "distance": "Cosine"} # Adjust size based on embedding dimensions
84
+ )
85
+
86
+ # Example documents
87
+ combined_documents = ["This is document one.", "This is document two."]
88
+
89
+ # Create vectorstore (with embedding function)
90
+ vectorstore = qdrant_client.upsert(
91
+ collection_name="ai_policy",
92
+ points=[
93
+ {"id": i, "vector": embed([doc])[0], "payload": {"document": doc}}
94
+ for i, doc in enumerate(combined_documents)
95
+ ]
96
+ )
97
  retriever = vectorstore.as_retriever()
98
 
99
  ## Generation LLM