Sam commited on
Commit
4726801
·
1 Parent(s): 1d7b86f

renamed files

Browse files
Files changed (2) hide show
  1. midterm-app +0 -1
  2. midterm_app.py +0 -124
midterm-app DELETED
@@ -1 +0,0 @@
1
- Subproject commit a6492b2481dbd143f30a0d5ebf707b3b070e7f54
 
 
midterm_app.py DELETED
@@ -1,124 +0,0 @@
1
- # Import Required Libraries
2
- import os
3
- from dotenv import load_dotenv
4
-
5
- import openai
6
- import fitz # PyMuPDF
7
- import pandas as pd
8
- from transformers import pipeline
9
- from qdrant_client import QdrantClient
10
- from qdrant_client.http import models as qdrant_models
11
- import chainlit as cl
12
- import tiktoken
13
-
14
- # Specific imports from the libraries
15
- from langchain.document_loaders import PyMuPDFLoader
16
- from langchain.text_splitter import RecursiveCharacterTextSplitter
17
- from langchain.embeddings import OpenAIEmbeddings
18
- #old import from langchain_openai import OpenAIEmbeddings
19
- from langchain_community.vectorstores import Qdrant
20
- from langchain.prompts import ChatPromptTemplate
21
- from langchain.chat_models import ChatOpenAI
22
- #old import from langchain_openai import ChatOpenAI
23
- from operator import itemgetter
24
- from langchain.schema.output_parser import StrOutputParser
25
- from langchain.schema.runnable import RunnablePassthrough
26
-
27
- # Set Environment Variables
28
- load_dotenv()
29
-
30
- # Load environment variables
31
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
32
-
33
- # Initialize OpenAI client after loading the environment variables
34
- openai.api_key = OPENAI_API_KEY
35
-
36
- # Load and split documents
37
- loader = PyMuPDFLoader("/home/user/app/data/airbnb_q1_2024.pdf")
38
- #old file path is loader = PyMuPDFLoader("/Users/sampazar/AIE3-Midterm/data/airbnb_q1_2024.pdf")
39
- documents = loader.load()
40
-
41
- def tiktoken_len(text):
42
- tokens = tiktoken.encoding_for_model("gpt-4o").encode(text)
43
- return len(tokens)
44
-
45
- text_splitter = RecursiveCharacterTextSplitter(
46
- chunk_size=150,
47
- chunk_overlap=100,
48
- length_function = tiktoken_len
49
- )
50
-
51
- split_chunks = text_splitter.split_documents(documents)
52
-
53
-
54
- # Load OpenAI Embeddings Model
55
- embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
56
-
57
- # Creating a Qdrant Vector Store
58
- qdrant_vector_store = Qdrant.from_documents(
59
- split_chunks,
60
- embeddings,
61
- location=":memory:",
62
- collection_name="Airbnb_Q1_2024",
63
- )
64
-
65
- # Create a Retriever
66
- retriever = qdrant_vector_store.as_retriever()
67
-
68
- # Create a prompt template
69
- template = """Answer the question based only on the following context. If you cannot answer the question with the context, please respond with 'I don't know':
70
-
71
- Context:
72
- {context}
73
-
74
- Question:
75
- {question}
76
- """
77
-
78
- prompt = ChatPromptTemplate.from_template(template)
79
-
80
- # Define the primary LLM
81
- primary_llm = ChatOpenAI(model_name="gpt-4o", temperature=0)
82
-
83
- # Creating a Retrieval Augmented Generation (RAG) Chain
84
- retrieval_augmented_qa_chain = (
85
- # INVOKE CHAIN WITH: {"question" : "<>"}
86
- # "question" : populated by getting the value of the "question" key
87
- # "context" : populated by getting the value of the "question" key and chaining it into the base_retriever
88
- {"context": itemgetter("question") | retriever, "question": itemgetter("question")}
89
- # "context" : is assigned to a RunnablePassthrough object (will not be called or considered in the next step)
90
- # by getting the value of the "context" key from the previous step
91
- | RunnablePassthrough.assign(context=itemgetter("context"))
92
- # "response" : the "context" and "question" values are used to format our prompt object and then piped
93
- # into the LLM and stored in a key called "response"
94
- # "context" : populated by getting the value of the "context" key from the previous step
95
- | {"response": prompt | primary_llm, "context": itemgetter("context")}
96
- )
97
-
98
- # Chainlit integration for deployment
99
- @cl.on_chat_start # marks a function that will be executed at the start of a user session
100
- async def start_chat():
101
- settings = {
102
- "model": "gpt-4o",
103
- "temperature": 0,
104
- "max_tokens": 500,
105
- "top_p": 1,
106
- "frequency_penalty": 0,
107
- "presence_penalty": 0,
108
- }
109
- cl.user_session.set("settings", settings)
110
-
111
- @cl.on_message # marks a function that should be run each time the chatbot receives a message from a user
112
- async def handle_message(message: cl.Message):
113
- settings = cl.user_session.get("settings")
114
-
115
- response = retrieval_augmented_qa_chain.invoke({"question": message.content})
116
-
117
- #msg = cl.Message(content=response["response"])
118
- #await msg.send()
119
-
120
- # Extracting and sending just the content
121
- content = response["response"].content
122
- pretty_content = content.strip() # Remove any leading/trailing whitespace
123
-
124
- await cl.Message(content=pretty_content).send()