Lrosado commited on
Commit
49be2e7
·
verified ·
1 Parent(s): 649bf6a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +146 -146
app.py CHANGED
@@ -1,147 +1,147 @@
1
- import os
2
- import uuid
3
- import json
4
- import gradio as gr
5
-
6
- from openai import OpenAI
7
-
8
- from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
9
- from langchain_community.vectorstores import Chroma
10
-
11
- from huggingface_hub import CommitScheduler
12
- from pathlib import Path
13
-
14
-
15
- os.environ['OPEN_API_KEY'] = "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";
16
- client = OpenAI()
17
- #client = OpenAI(
18
- #base_url="https://api.openai.com/v1",
19
- #OpenAI.api_key = "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"
20
- #api_key=os.environ["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"]
21
- #)
22
-
23
- embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
24
-
25
- streamlit_collection = 'streamlit'
26
-
27
- vectorstore_persisted = Chroma(
28
- collection_name=streamlit_collection,
29
- persist_directory='./streamlitdb',
30
- embedding_function=embedding_model
31
- )
32
-
33
- retriever = vectorstore_persisted.as_retriever(
34
- search_type='similarity',
35
- search_kwargs={'k': 5}
36
- )
37
-
38
- # Prepare the logging functionality
39
-
40
- log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
41
- log_folder = log_file.parent
42
-
43
- scheduler = CommitScheduler(
44
- repo_id="document-qna-chroma-anyscale-logs",
45
- repo_type="dataset",
46
- folder_path=log_folder,
47
- path_in_repo="data",
48
- every=2
49
- )
50
-
51
- qna_system_message = """
52
- You are an assistant to a coder. Your task is to provide relevant information about the Python package Streamlit.
53
-
54
- User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
55
- The context contains references to specific portions of documents relevant to the user's query, along with source links.
56
- The source for a context will begin with the token ###Source
57
-
58
- When crafting your response:
59
- 1. Select the most relevant context or contexts to answer the question.
60
- 2. Include the source links in your response.
61
- 3. User questions will begin with the token: ###Question.
62
- 4. If the question is irrelevant to streamlit respond with - "I am an assistant for streamlit Docs. I can only help you with questions related to streamlit"
63
-
64
- Please adhere to the following guidelines:
65
- - Answer only using the context provided.
66
- - Do not mention anything about the context in your final answer.
67
- - If the answer is not found in the context, it is very very important for you to respond with "I don't know. Please check the docs @ 'https://docs.streamlit.io/'"
68
- - Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Sources:
69
- - Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
70
-
71
- Here is an example of how to structure your response:
72
-
73
- Answer:
74
- [Answer]
75
-
76
- Source
77
- [Source]
78
- """
79
-
80
- qna_user_message_template = """
81
- ###Context
82
- Here are some documents that are relevant to the question.
83
- {context}
84
- ```
85
- {question}
86
- ```
87
- """
88
-
89
- # Define the predict function that runs when 'Submit' is clicked or when a API request is made
90
- def predict(user_input):
91
-
92
- relevant_document_chunks = retriever.invoke(user_input)
93
- context_list = [d.page_content for d in relevant_document_chunks]
94
- context_for_query = ".".join(context_list)
95
-
96
- prompt = [
97
- {'role':'system', 'content': qna_system_message},
98
- {'role': 'user', 'content': qna_user_message_template.format(
99
- context=context_for_query,
100
- question=user_input
101
- )
102
- }
103
- ]
104
-
105
- try:
106
- response = client.chat.completions.create(
107
- model='gpt-4o-mini',
108
- messages=prompt,
109
- temperature=0
110
- )
111
-
112
- prediction = response.choices[0].message.content
113
-
114
- except Exception as e:
115
- prediction = e
116
-
117
- # While the prediction is made, log both the inputs and outputs to a local log file
118
- # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
119
- # access
120
-
121
- with scheduler.lock:
122
- with log_file.open("a") as f:
123
- f.write(json.dumps(
124
- {
125
- 'user_input': user_input,
126
- 'retrieved_context': context_for_query,
127
- 'model_response': prediction
128
- }
129
- ))
130
- f.write("\n")
131
-
132
- return prediction
133
-
134
-
135
- textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
136
-
137
- # Create the interface
138
- demo = gr.Interface(
139
- inputs=textbox, fn=predict, outputs="text",
140
- title="Streamlit Q&A System",
141
- description="This web API presents an interface to ask questions on streamlit documentation",
142
- article="Note that questions that are not relevant to streamlit or not within the sample documents will be answered with 'I don't know. Please check the docs @ 'https://docs.streamlit.io/''",
143
- concurrency_limit=20
144
- )
145
-
146
- demo.queue()
147
  demo.launch()
 
1
+ import os
2
+ import uuid
3
+ import json
4
+ import gradio as gr
5
+
6
+ from openai import OpenAI
7
+
8
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
9
+ from langchain_community.vectorstores import Chroma
10
+
11
+ from huggingface_hub import CommitScheduler
12
+ from pathlib import Path
13
+
14
+
15
+ #os.environ['OPEN_API_KEY'] = "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";
16
+ client = OpenAI(api_key="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")
17
+ #client = OpenAI(
18
+ #base_url="https://api.openai.com/v1",
19
+ #OpenAI.api_key = "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"
20
+ #api_key=os.environ["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"]
21
+ #)
22
+
23
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
24
+
25
+ streamlit_collection = 'streamlit'
26
+
27
+ vectorstore_persisted = Chroma(
28
+ collection_name=streamlit_collection,
29
+ persist_directory='./streamlitdb',
30
+ embedding_function=embedding_model
31
+ )
32
+
33
+ retriever = vectorstore_persisted.as_retriever(
34
+ search_type='similarity',
35
+ search_kwargs={'k': 5}
36
+ )
37
+
38
+ # Prepare the logging functionality
39
+
40
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
41
+ log_folder = log_file.parent
42
+
43
+ scheduler = CommitScheduler(
44
+ repo_id="document-qna-chroma-anyscale-logs",
45
+ repo_type="dataset",
46
+ folder_path=log_folder,
47
+ path_in_repo="data",
48
+ every=2
49
+ )
50
+
51
+ qna_system_message = """
52
+ You are an assistant to a coder. Your task is to provide relevant information about the Python package Streamlit.
53
+
54
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
55
+ The context contains references to specific portions of documents relevant to the user's query, along with source links.
56
+ The source for a context will begin with the token ###Source
57
+
58
+ When crafting your response:
59
+ 1. Select the most relevant context or contexts to answer the question.
60
+ 2. Include the source links in your response.
61
+ 3. User questions will begin with the token: ###Question.
62
+ 4. If the question is irrelevant to streamlit respond with - "I am an assistant for streamlit Docs. I can only help you with questions related to streamlit"
63
+
64
+ Please adhere to the following guidelines:
65
+ - Answer only using the context provided.
66
+ - Do not mention anything about the context in your final answer.
67
+ - If the answer is not found in the context, it is very very important for you to respond with "I don't know. Please check the docs @ 'https://docs.streamlit.io/'"
68
+ - Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Sources:
69
+ - Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
70
+
71
+ Here is an example of how to structure your response:
72
+
73
+ Answer:
74
+ [Answer]
75
+
76
+ Source
77
+ [Source]
78
+ """
79
+
80
+ qna_user_message_template = """
81
+ ###Context
82
+ Here are some documents that are relevant to the question.
83
+ {context}
84
+ ```
85
+ {question}
86
+ ```
87
+ """
88
+
89
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
90
+ def predict(user_input):
91
+
92
+ relevant_document_chunks = retriever.invoke(user_input)
93
+ context_list = [d.page_content for d in relevant_document_chunks]
94
+ context_for_query = ".".join(context_list)
95
+
96
+ prompt = [
97
+ {'role':'system', 'content': qna_system_message},
98
+ {'role': 'user', 'content': qna_user_message_template.format(
99
+ context=context_for_query,
100
+ question=user_input
101
+ )
102
+ }
103
+ ]
104
+
105
+ try:
106
+ response = client.chat.completions.create(
107
+ model='gpt-4o-mini',
108
+ messages=prompt,
109
+ temperature=0
110
+ )
111
+
112
+ prediction = response.choices[0].message.content
113
+
114
+ except Exception as e:
115
+ prediction = e
116
+
117
+ # While the prediction is made, log both the inputs and outputs to a local log file
118
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
119
+ # access
120
+
121
+ with scheduler.lock:
122
+ with log_file.open("a") as f:
123
+ f.write(json.dumps(
124
+ {
125
+ 'user_input': user_input,
126
+ 'retrieved_context': context_for_query,
127
+ 'model_response': prediction
128
+ }
129
+ ))
130
+ f.write("\n")
131
+
132
+ return prediction
133
+
134
+
135
+ textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
136
+
137
+ # Create the interface
138
+ demo = gr.Interface(
139
+ inputs=textbox, fn=predict, outputs="text",
140
+ title="Streamlit Q&A System",
141
+ description="This web API presents an interface to ask questions on streamlit documentation",
142
+ article="Note that questions that are not relevant to streamlit or not within the sample documents will be answered with 'I don't know. Please check the docs @ 'https://docs.streamlit.io/''",
143
+ concurrency_limit=20
144
+ )
145
+
146
+ demo.queue()
147
  demo.launch()