File size: 5,823 Bytes
212b42a
 
 
5a2b2d3
 
 
 
 
 
 
 
 
78c941e
5a2b2d3
 
 
 
 
 
 
 
 
27306dd
 
 
 
 
 
c2c40ec
9a34296
78c941e
5a2b2d3
78c941e
 
 
 
 
 
5a2b2d3
 
78c941e
 
 
 
5a2b2d3
 
78c941e
 
5a2b2d3
 
78c941e
 
 
 
 
 
 
 
5a2b2d3
 
78c941e
 
5a2b2d3
 
 
78c941e
 
 
 
 
 
5a2b2d3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212b42a
 
 
 
 
 
 
 
5a2b2d3
 
 
b1cee42
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import sys
import os

from langchain_core.runnables import Runnable
from langchain_core.callbacks import BaseCallbackHandler
from fastapi import FastAPI, Request, Depends
from sse_starlette.sse import EventSourceResponse
from langserve.serialization import WellKnownLCSerializer
from typing import List
from sqlalchemy.orm import Session

import schemas
from chains import simple_chain, formatted_chain
import crud, models, schemas
from database import SessionLocal, engine
from callbacks import LogResponseCallback


models.Base.metadata.create_all(bind=engine)

app = FastAPI()

def get_db():
    db = SessionLocal()
    try:
        yield db
    finally:
        db.close()

# ..
# "async" marks the function as asynchronous, allowing it to pause and resume during operations like streaming or I/O.
async def generate_stream(input_data: schemas.BaseModel, runnable: Runnable, callbacks: List[BaseCallbackHandler]=[]):
    """generate_stream is an asynchronous generator that processes input data, 
    streams output data from a runnable object, serializes each output, and yields 
    it to the client in real-time as part of a server-sent event (SSE) stream.
    It uses callbacks to customize the processing, serializes each piece of output 
    using WellKnownLCSerializer, and indicates the end of the stream with a final “end” event.
    """    
    for output in runnable.stream(input_data.dict(), config={"callbacks": callbacks}): 
        data = WellKnownLCSerializer().dumps(output).decode("utf-8")
        yield {'data': data, "event": "data"}
    # After all the data has been streamed and the loop is complete, the function yields a final event to signal 
    # the end of the stream. This sends an {"event": "end"} message to the client, letting them know that no more 
    # data will be sent.
    yield {"event": "end"}

# This registers the function simple_stream as a handler for HTTP POST requests at the URL endpoint /simple/stream. 
# It means that when a client sends a POST request to this endpoint, this function will be triggered.
@app.post("/simple/stream")
async def simple_stream(request: Request):
    """the function handles a POST request at the /simple/stream endpoint, 
    extracts the JSON body, unpacks the "input" field, and then uses it to 
    initialize a UserQuestion schema object (which performs validation 
    and data transformation) and then initiates a server-sent event response 
    to stream data back to the client based on the user’s question.
    """    
    # await is used because parsing the JSON may involve asynchronous I/O operations, 
    # especially when handling larger payloads.
    data = await request.json()
    user_question = schemas.UserQuestion(**data['input'])
    # This line returns an EventSourceResponse, which is typically used to handle server-sent events (SSE). 
    # It’s a special kind of response that streams data back to the client in real time. 
    return EventSourceResponse(generate_stream(user_question, simple_chain))


@app.post("/formatted/stream")
async def formatted_stream(request: Request):
    # TODO: use the formatted_chain to implement the "/formatted/stream" endpoint.
    data = await request.json()
    user_question = schemas.UserQuestion(**data['input'])
    return EventSourceResponse(generate_stream(user_question, formatted_chain))


# @app.post("/history/stream")
# async def history_stream(request: Request, db: Session = Depends(get_db)):  
#     # TODO: Let's implement the "/history/stream" endpoint. The endpoint should follow those steps:
#     # - The endpoint receives the request
#     # - The request is parsed into a user request
#     # - The user request is used to pull the chat history of the user
#     # - We add as part of the user history the current question by using add_message.
#     # - We create an instance of HistoryInput by using format_chat_history.
#     # - We use the history input within the history chain.
#     raise NotImplemented


# @app.post("/rag/stream")
# async def rag_stream(request: Request, db: Session = Depends(get_db)):  
#     # TODO: Let's implement the "/rag/stream" endpoint. The endpoint should follow those steps:
#     # - The endpoint receives the request
#     # - The request is parsed into a user request
#     # - The user request is used to pull the chat history of the user
#     # - We add as part of the user history the current question by using add_message.
#     # - We create an instance of HistoryInput by using format_chat_history.
#     # - We use the history input within the rag chain.
#     raise NotImplemented


# @app.post("/filtered_rag/stream")
# async def filtered_rag_stream(request: Request, db: Session = Depends(get_db)):  
#     # TODO: Let's implement the "/filtered_rag/stream" endpoint. The endpoint should follow those steps:
#     # - The endpoint receives the request
#     # - The request is parsed into a user request
#     # - The user request is used to pull the chat history of the user
#     # - We add as part of the user history the current question by using add_message.
#     # - We create an instance of HistoryInput by using format_chat_history.
#     # - We use the history input within the filtered rag chain.
#     raise NotImplemented
    

# Run From the Parent Directory with Script
# If you want to use uvicorn.run from within a script using "app.main:app", 
# you need to provide the proper path. In this way no matter you run the code 
# locally or on the huggingface space, you will alwazs use "app.main:app" as 
# input argument in the uvicorn.run

# Add the parent directory to sys.path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

if __name__ == "__main__":
    import uvicorn
    uvicorn.run("app.main:app", host="localhost", reload=True,  port=8000)