pgurazada1 commited on
Commit
d935ee0
·
verified ·
1 Parent(s): 76bb6df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -7
app.py CHANGED
@@ -4,9 +4,10 @@ import gradio as gr
4
  from dotenv import load_dotenv
5
  from openai import AzureOpenAI
6
 
7
- from langchain_openai import AzureOpenAIEmbeddings
8
-
9
- from langchain_chroma import Chroma
 
10
  from langchain.retrievers import ContextualCompressionRetriever
11
  from langchain.retrievers.document_compressors import CrossEncoderReranker
12
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
@@ -16,7 +17,15 @@ load_dotenv()
16
  client = AzureOpenAI(
17
  api_key=os.environ['AZURE_OPENAI_KEY'],
18
  azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
19
- api_version='2024-02-01'
 
 
 
 
 
 
 
 
20
  )
21
 
22
  model_name = 'gpt-4o-mini'
@@ -24,7 +33,7 @@ model_name = 'gpt-4o-mini'
24
  embedding_model = AzureOpenAIEmbeddings(
25
  api_key=os.environ['AZURE_OPENAI_KEY'],
26
  azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
27
- api_version='2024-02-01',
28
  azure_deployment="text-embedding-ada-002"
29
  )
30
 
@@ -36,8 +45,38 @@ vectorstore_persisted = Chroma(
36
  embedding_function=embedding_model
37
  )
38
 
39
- retriever = vectorstore_persisted.as_retriever(
40
- search_type='similarity',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  search_kwargs={'k': 10}
42
  )
43
 
 
4
  from dotenv import load_dotenv
5
  from openai import AzureOpenAI
6
 
7
+ from langchain_openai import AzureOpenAIEmbeddings, AzureChatOpenAI
8
+ from langchain.chains.query_constructor.base import AttributeInfo
9
+ from langchain.retrievers.self_query.base import SelfQueryRetriever
10
+ from langchain_community.vectorstores import Chroma
11
  from langchain.retrievers import ContextualCompressionRetriever
12
  from langchain.retrievers.document_compressors import CrossEncoderReranker
13
  from langchain_community.cross_encoders import HuggingFaceCrossEncoder
 
17
  client = AzureOpenAI(
18
  api_key=os.environ['AZURE_OPENAI_KEY'],
19
  azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
20
+ api_version=os.environ["AZURE_OPENAI_APIVERSION"]
21
+ )
22
+
23
+ llm = AzureChatOpenAI(
24
+ api_key=os.environ['AZURE_OPENAI_KEY'],
25
+ azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
26
+ api_version=os.environ["AZURE_OPENAI_APIVERSION"],
27
+ model="gpt-4o-mini",
28
+ temperature=0
29
  )
30
 
31
  model_name = 'gpt-4o-mini'
 
33
  embedding_model = AzureOpenAIEmbeddings(
34
  api_key=os.environ['AZURE_OPENAI_KEY'],
35
  azure_endpoint=os.environ['AZURE_OPENAI_ENDPOINT'],
36
+ api_version=os.environ["AZURE_OPENAI_APIVERSION"],
37
  azure_deployment="text-embedding-ada-002"
38
  )
39
 
 
45
  embedding_function=embedding_model
46
  )
47
 
48
+ metadata_field_info = [
49
+ AttributeInfo(
50
+ name="year",
51
+ description="The year of the Tesla 10-K annual report",
52
+ type="string",
53
+ ),
54
+ AttributeInfo(
55
+ name="file",
56
+ description="The filename of the source document",
57
+ type="string",
58
+ ),
59
+ AttributeInfo(
60
+ name="page_number",
61
+ description="The page number of the document in the original file",
62
+ type="string",
63
+ ),
64
+ AttributeInfo(
65
+ name="source",
66
+ description="The source of the document content: text or image",
67
+ type="string"
68
+ )
69
+ ]
70
+
71
+ document_content_description = "10-k Statements from Tesla"
72
+
73
+ retriever = SelfQueryRetriever.from_llm(
74
+ llm,
75
+ vectorstore_persisted,
76
+ document_content_description,
77
+ metadata_field_info,
78
+ enable_limit=True,
79
+ verbose=True,
80
  search_kwargs={'k': 10}
81
  )
82