Cheselle commited on
Commit
e2651ac
·
verified ·
1 Parent(s): dca2082

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -42
app.py CHANGED
@@ -41,15 +41,15 @@ recursive_framework_document = metadata_generator(ai_framework_document, "AI Fra
41
  recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
42
  combined_documents = recursive_framework_document + recursive_blueprint_document
43
 
44
- """
45
- #from transformers import AutoTokenizer, AutoModel
46
- #import torch
47
- #embedding = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
48
- #tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
49
 
50
- from sentence_transformers import SentenceTransformer
51
- embedding_model = SentenceTransformer("Cheselle/finetuned-arctic-sentence")
52
- embeddings = embedding_model.encode(ai_framework_document + ai_blueprint_document)
 
 
 
 
 
53
 
54
  vectorstore = Qdrant.from_documents(
55
  documents=combined_documents,
@@ -57,41 +57,7 @@ vectorstore = Qdrant.from_documents(
57
  location=":memory:",
58
  collection_name="ai_policy"
59
  )
60
- """
61
-
62
- from transformers import AutoTokenizer, AutoModel
63
- import torch
64
- from qdrant_client import QdrantClient
65
-
66
- # Load the tokenizer and model
67
- tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
68
- model = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
69
-
70
- # Define a wrapper function for embedding documents
71
- def embed(documents):
72
- inputs = tokenizer(documents, return_tensors="pt", padding=True, truncation=True)
73
- with torch.no_grad():
74
- outputs = model(**inputs)
75
- return outputs.last_hidden_state.mean(dim=1).numpy() # Return embeddings
76
-
77
- # Initialize Qdrant client (in-memory for testing)
78
- qdrant_client = QdrantClient(":memory:")
79
-
80
- # Create the Qdrant collection
81
- qdrant_client.recreate_collection(
82
- collection_name="ai_policy",
83
- vectors_config={"size": 768, "distance": "Cosine"} # Adjust size based on embedding dimensions
84
- )
85
-
86
 
87
- # Create vectorstore (with embedding function)
88
- vectorstore = qdrant_client.upsert(
89
- collection_name="ai_policy",
90
- points=[
91
- {"id": i, "vector": embed([doc])[0], "payload": {"document": doc}}
92
- for i, doc in enumerate(combined_documents)
93
- ]
94
- )
95
  retriever = vectorstore.as_retriever()
96
 
97
  ## Generation LLM
 
41
  recursive_blueprint_document = metadata_generator(ai_blueprint_document, "AI Blueprint")
42
  combined_documents = recursive_framework_document + recursive_blueprint_document
43
 
 
 
 
 
 
44
 
45
+ from transformers import AutoTokenizer, AutoModel
46
+ import torch
47
+ embeddings = AutoModel.from_pretrained("Cheselle/finetuned-arctic-sentence")
48
+ tokenizer = AutoTokenizer.from_pretrained("Cheselle/finetuned-arctic-sentence")
49
+
50
+ #from sentence_transformers import SentenceTransformer
51
+ #embedding_model = SentenceTransformer("Cheselle/finetuned-arctic-sentence")
52
+ #embeddings = embedding_model.encode(ai_framework_document + ai_blueprint_document)
53
 
54
  vectorstore = Qdrant.from_documents(
55
  documents=combined_documents,
 
57
  location=":memory:",
58
  collection_name="ai_policy"
59
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
 
 
 
 
 
 
 
 
61
  retriever = vectorstore.as_retriever()
62
 
63
  ## Generation LLM