Spaces:
Runtime error
Runtime error
ishaan-mital
commited on
Commit
·
d1289ae
1
Parent(s):
e665513
dsf
Browse files
app.py
CHANGED
@@ -12,63 +12,65 @@ API_URL = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-b
|
|
12 |
headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
|
13 |
retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
|
14 |
|
15 |
-
embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
|
16 |
-
# device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
|
17 |
|
18 |
-
embed_model = HuggingFaceEmbeddings(
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
)
|
23 |
|
24 |
|
25 |
-
pinecone.init(
|
26 |
-
|
27 |
-
|
28 |
-
)
|
29 |
|
30 |
-
index_name = 'llama-rag'
|
31 |
-
index = pinecone.Index(index_name)
|
32 |
-
text_field = 'text' # field in metadata that contains text content
|
33 |
-
docs = [
|
34 |
-
|
35 |
-
|
36 |
-
]
|
37 |
|
38 |
-
embeddings = embed_model.embed_documents(docs)
|
39 |
-
if index_name not in pinecone.list_indexes():
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
vectorstore = Pinecone(
|
49 |
-
|
50 |
-
)
|
51 |
|
52 |
def call_llm_api(input_text,context):
|
|
|
|
|
53 |
payload = {
|
54 |
-
"inputs":
|
55 |
}
|
56 |
response = requests.post(API_URL, headers=headers, json=payload)
|
57 |
return response.json() # Adjust as needed based on your API response format
|
58 |
|
59 |
-
rag_pipeline = RetrievalQA.from_chain_type(
|
60 |
-
|
61 |
-
|
62 |
-
)
|
63 |
|
64 |
|
65 |
def main(question):
|
66 |
# return rag_pipeline(question)
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
|
73 |
demo = gr.Interface(main, inputs = "text", outputs = "text")
|
74 |
|
|
|
12 |
headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
|
13 |
retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")
|
14 |
|
15 |
+
# embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
|
16 |
+
# # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'
|
17 |
|
18 |
+
# embed_model = HuggingFaceEmbeddings(
|
19 |
+
# model_name=embed_model_id,
|
20 |
+
# # model_kwargs={'device': device},
|
21 |
+
# # encode_kwargs={'device': device, 'batch_size': 32}
|
22 |
+
# )
|
23 |
|
24 |
|
25 |
+
# pinecone.init(
|
26 |
+
# api_key=os.environ.get('PINECONE_API_KEY'),
|
27 |
+
# environment=os.environ.get('PINECONE_ENVIRONMENT')
|
28 |
+
# )
|
29 |
|
30 |
+
# index_name = 'llama-rag'
|
31 |
+
# index = pinecone.Index(index_name)
|
32 |
+
# text_field = 'text' # field in metadata that contains text content
|
33 |
+
# docs = [
|
34 |
+
# "this is one document",
|
35 |
+
# "and another document"
|
36 |
+
# ]
|
37 |
|
38 |
+
# embeddings = embed_model.embed_documents(docs)
|
39 |
+
# if index_name not in pinecone.list_indexes():
|
40 |
+
# pinecone.create_index(
|
41 |
+
# index_name,
|
42 |
+
# dimension=len(embeddings[0]),
|
43 |
+
# metric='cosine'
|
44 |
+
# )
|
45 |
+
# # wait for index to finish initialization
|
46 |
+
# while not pinecone.describe_index(index_name).status['ready']:
|
47 |
+
# time.sleep(1)
|
48 |
+
# vectorstore = Pinecone(
|
49 |
+
# index, embed_model.embed_query, text_field
|
50 |
+
# )
|
51 |
|
52 |
def call_llm_api(input_text,context):
|
53 |
+
init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
|
54 |
+
info="Information: \n"
|
55 |
payload = {
|
56 |
+
"inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
|
57 |
}
|
58 |
response = requests.post(API_URL, headers=headers, json=payload)
|
59 |
return response.json() # Adjust as needed based on your API response format
|
60 |
|
61 |
+
# rag_pipeline = RetrievalQA.from_chain_type(
|
62 |
+
# llm=call_llm_api, chain_type='stuff',
|
63 |
+
# retriever=vectorstore.as_retriever()
|
64 |
+
# )
|
65 |
|
66 |
|
67 |
def main(question):
|
68 |
# return rag_pipeline(question)
|
69 |
+
global chatbot
|
70 |
+
context = retrieval.predict(question, api_name = "/predict")
|
71 |
+
answer=call_llm_api(question,context)
|
72 |
+
chatbot = answer
|
73 |
+
return answer
|
74 |
|
75 |
demo = gr.Interface(main, inputs = "text", outputs = "text")
|
76 |
|