Spaces:
Sleeping
Sleeping
File size: 3,925 Bytes
d24bd61 332f4de 20469f9 332f4de 6915dbd 332f4de ea8be0e 53da814 0298fd9 53da814 332f4de 20469f9 d24bd61 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
from fastapi import FastAPI
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams
from uuid import uuid4
from langchain_core.documents import Document
from typing import Union, List, Dict, Any
from pydantic import BaseModel, Field
class Data(BaseModel):
items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")
document_1 = Document(
page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
metadata={"source": "tweet"},
)
document_2 = Document(
page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
metadata={"source": "news"},
)
document_3 = Document(
page_content="Building an exciting new project with LangChain - come check it out!",
metadata={"source": "tweet"},
)
document_4 = Document(
page_content="Robbers broke into the city bank and stole $1 million in cash.",
metadata={"source": "news"},
)
document_5 = Document(
page_content="Wow! That was an amazing movie. I can't wait to see it again.",
metadata={"source": "tweet"},
)
document_6 = Document(
page_content="Is the new iPhone worth the price? Read this review to find out.",
metadata={"source": "website"},
)
document_7 = Document(
page_content="The top 10 soccer players in the world right now.",
metadata={"source": "website"},
)
document_8 = Document(
page_content="LangGraph is the best framework for building stateful, agentic applications!",
metadata={"source": "tweet"},
)
document_9 = Document(
page_content="The stock market is down 500 points today due to fears of a recession.",
metadata={"source": "news"},
)
document_10 = Document(
page_content="I have a bad feeling I am going to get deleted :(",
metadata={"source": "tweet"},
)
documents = [
document_1,
document_2,
document_3,
document_4,
document_5,
document_6,
document_7,
document_8,
document_9,
document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]
docs = documents
sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")
client = QdrantClient(path="tmp/langchain_qdrant")
# Create a collection with sparse vectors
client.create_collection(
collection_name="my_documents",
vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
sparse_vectors_config={
"sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
},
)
qdrant = QdrantVectorStore(
client=client,
collection_name="my_documents",
sparse_embedding=sparse_embeddings,
retrieval_mode=RetrievalMode.SPARSE,
sparse_vector_name="sparse",
)
qdrant.add_documents(documents=documents, ids=uuids)
app = FastAPI()
@app.get("/get_data")
def get_data(query: str):
# query = "How much money did the robbers steal?"
found_docs = [x.model_dump() for x in qdrant.similarity_search(query)]
for doc in found_docs:
doc.pop("id", None)
# key =
for k in list(doc["metadata"].keys()):
if k[0] == "_":
doc["metadata"].pop(k)
return {
"data": found_docs
}
@app.post("/add_data")
def add_data(data: Data):
global qdrant
if isinstance(data.items, dict):
qdrant.add_documents(documents=[Document(**data.items)])
else:
qdrant.add_documents(documents=[Document(**x.items) for x in data])
return {"message":"Create data successfully!", "status_code":201}
@app.get("/")
def greet_json():
return {"Hello": "World!"}
|