Lrosado commited on
Commit
ce821d5
·
verified ·
1 Parent(s): 0d8fb96

Upload 4 files

Browse files
Files changed (4) hide show
  1. .gitattributes +36 -35
  2. README.md +12 -12
  3. app.py +147 -0
  4. requirements.txt +5 -0
.gitattributes CHANGED
@@ -1,35 +1,36 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tesla_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,12 @@
1
- ---
2
- title: Streamlit Qna Documents2
3
- emoji: 🐨
4
- colorFrom: purple
5
- colorTo: pink
6
- sdk: streamlit
7
- sdk_version: 1.42.0
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
+ ---
2
+ title: Streamlit Documentation Qna Chroma Anyscale
3
+ emoji:
4
+ colorFrom: yellow
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 4.29.0
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ import json
4
+ import gradio as gr
5
+
6
+ from openai import OpenAI
7
+
8
+ from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
9
+ from langchain_community.vectorstores import Chroma
10
+
11
+ from huggingface_hub import CommitScheduler
12
+ from pathlib import Path
13
+
14
+
15
+ os.environ['OPEN_API_KEY'] = "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";
16
+ client = OpenAI()
17
+ #client = OpenAI(
18
+ #base_url="https://api.openai.com/v1",
19
+ #OpenAI.api_key = "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"
20
+ #api_key=os.environ["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"]
21
+ #)
22
+
23
+ embedding_model = SentenceTransformerEmbeddings(model_name='thenlper/gte-large')
24
+
25
+ streamlit_collection = 'streamlit'
26
+
27
+ vectorstore_persisted = Chroma(
28
+ collection_name=streamlit_collection,
29
+ persist_directory='./streamlitdb',
30
+ embedding_function=embedding_model
31
+ )
32
+
33
+ retriever = vectorstore_persisted.as_retriever(
34
+ search_type='similarity',
35
+ search_kwargs={'k': 5}
36
+ )
37
+
38
+ # Prepare the logging functionality
39
+
40
+ log_file = Path("logs/") / f"data_{uuid.uuid4()}.json"
41
+ log_folder = log_file.parent
42
+
43
+ scheduler = CommitScheduler(
44
+ repo_id="document-qna-chroma-anyscale-logs",
45
+ repo_type="dataset",
46
+ folder_path=log_folder,
47
+ path_in_repo="data",
48
+ every=2
49
+ )
50
+
51
+ qna_system_message = """
52
+ You are an assistant to a coder. Your task is to provide relevant information about the Python package Streamlit.
53
+
54
+ User input will include the necessary context for you to answer their questions. This context will begin with the token: ###Context.
55
+ The context contains references to specific portions of documents relevant to the user's query, along with source links.
56
+ The source for a context will begin with the token ###Source
57
+
58
+ When crafting your response:
59
+ 1. Select the most relevant context or contexts to answer the question.
60
+ 2. Include the source links in your response.
61
+ 3. User questions will begin with the token: ###Question.
62
+ 4. If the question is irrelevant to streamlit respond with - "I am an assistant for streamlit Docs. I can only help you with questions related to streamlit"
63
+
64
+ Please adhere to the following guidelines:
65
+ - Answer only using the context provided.
66
+ - Do not mention anything about the context in your final answer.
67
+ - If the answer is not found in the context, it is very very important for you to respond with "I don't know. Please check the docs @ 'https://docs.streamlit.io/'"
68
+ - Always quote the source when you use the context. Cite the relevant source at the end of your response under the section - Sources:
69
+ - Do not make up sources. Use the links provided in the sources section of the context and nothing else. You are prohibited from providing other links/sources.
70
+
71
+ Here is an example of how to structure your response:
72
+
73
+ Answer:
74
+ [Answer]
75
+
76
+ Source
77
+ [Source]
78
+ """
79
+
80
+ qna_user_message_template = """
81
+ ###Context
82
+ Here are some documents that are relevant to the question.
83
+ {context}
84
+ ```
85
+ {question}
86
+ ```
87
+ """
88
+
89
+ # Define the predict function that runs when 'Submit' is clicked or when a API request is made
90
+ def predict(user_input):
91
+
92
+ relevant_document_chunks = retriever.invoke(user_input)
93
+ context_list = [d.page_content for d in relevant_document_chunks]
94
+ context_for_query = ".".join(context_list)
95
+
96
+ prompt = [
97
+ {'role':'system', 'content': qna_system_message},
98
+ {'role': 'user', 'content': qna_user_message_template.format(
99
+ context=context_for_query,
100
+ question=user_input
101
+ )
102
+ }
103
+ ]
104
+
105
+ try:
106
+ response = client.chat.completions.create(
107
+ model='gpt-4o-mini',
108
+ messages=prompt,
109
+ temperature=0
110
+ )
111
+
112
+ prediction = response.choices[0].message.content
113
+
114
+ except Exception as e:
115
+ prediction = e
116
+
117
+ # While the prediction is made, log both the inputs and outputs to a local log file
118
+ # While writing to the log file, ensure that the commit scheduler is locked to avoid parallel
119
+ # access
120
+
121
+ with scheduler.lock:
122
+ with log_file.open("a") as f:
123
+ f.write(json.dumps(
124
+ {
125
+ 'user_input': user_input,
126
+ 'retrieved_context': context_for_query,
127
+ 'model_response': prediction
128
+ }
129
+ ))
130
+ f.write("\n")
131
+
132
+ return prediction
133
+
134
+
135
+ textbox = gr.Textbox(placeholder="Enter your query here", lines=6)
136
+
137
+ # Create the interface
138
+ demo = gr.Interface(
139
+ inputs=textbox, fn=predict, outputs="text",
140
+ title="Streamlit Q&A System",
141
+ description="This web API presents an interface to ask questions on streamlit documentation",
142
+ article="Note that questions that are not relevant to streamlit or not within the sample documents will be answered with 'I don't know. Please check the docs @ 'https://docs.streamlit.io/''",
143
+ concurrency_limit=20
144
+ )
145
+
146
+ demo.queue()
147
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ openai==1.23.2
2
+ chromadb==0.4.22
3
+ langchain==0.1.9
4
+ langchain-community==0.0.32
5
+ sentence-transformers==2.3.1