File size: 3,925 Bytes
d24bd61
332f4de
 
 
 
 
 
 
 
 
 
 
 
20469f9
 
 
 
 
 
332f4de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6915dbd
 
 
332f4de
 
 
ea8be0e
53da814
 
 
0298fd9
 
53da814
 
332f4de
 
 
 
 
 
20469f9
 
 
 
 
 
 
 
 
 
 
 
d24bd61
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
from fastapi import FastAPI
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

from langchain_qdrant import FastEmbedSparse, QdrantVectorStore, RetrievalMode
from qdrant_client import QdrantClient, models
from qdrant_client.http.models import Distance, SparseVectorParams, VectorParams

from uuid import uuid4

from langchain_core.documents import Document

from typing import Union, List, Dict, Any
from pydantic import BaseModel, Field

class Data(BaseModel):
    items: Union[Dict[str, Any], List[Dict[str, Any]]] = Field(..., description="Either a dictionary or a list of dictionaries.")

document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)

document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees Fahrenheit.",
    metadata={"source": "news"},
)

document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)

document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)

document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)

document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)

document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)

document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)

document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)

document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)

documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]
uuids = [str(uuid4()) for _ in range(len(documents))]

docs = documents

sparse_embeddings = FastEmbedSparse(model_name="Qdrant/bm25")

client = QdrantClient(path="tmp/langchain_qdrant")

# Create a collection with sparse vectors
client.create_collection(
    collection_name="my_documents",
    vectors_config={"dense": VectorParams(size=3072, distance=Distance.COSINE)},
    sparse_vectors_config={
        "sparse": SparseVectorParams(index=models.SparseIndexParams(on_disk=False))
    },
)

qdrant = QdrantVectorStore(
    client=client,
    collection_name="my_documents",
    sparse_embedding=sparse_embeddings,
    retrieval_mode=RetrievalMode.SPARSE,
    sparse_vector_name="sparse",
    
)

qdrant.add_documents(documents=documents, ids=uuids)

app = FastAPI()

@app.get("/get_data")
def get_data(query: str):
    # query = "How much money did the robbers steal?"
    found_docs = [x.model_dump() for x in qdrant.similarity_search(query)]

    for doc in found_docs:
        doc.pop("id", None)
        # key = 
        for k in list(doc["metadata"].keys()):
            if k[0] == "_":
                doc["metadata"].pop(k)

    return {
        "data": found_docs
    }


@app.post("/add_data")
def add_data(data: Data):
    global qdrant
    if isinstance(data.items, dict):
        qdrant.add_documents(documents=[Document(**data.items)])
    else:
        qdrant.add_documents(documents=[Document(**x.items) for x in data])

    return {"message":"Create data successfully!", "status_code":201}



@app.get("/")
def greet_json():
    return {"Hello": "World!"}