File size: 4,102 Bytes
4d82c20
 
1e5cddc
9bd7561
 
 
1e5cddc
9bd7561
 
4d82c20
21ed16a
8e3e8e3
e665513
8e3e8e3
21ed16a
d1289ae
8e3e8e3
21ed16a
 
 
 
 
8e3e8e3
 
21ed16a
 
 
 
8e3e8e3
21ed16a
 
 
d1289ae
 
 
 
8e3e8e3
d1289ae
 
 
 
 
 
 
 
 
 
21ed16a
 
 
8e3e8e3
d1289ae
 
571a3b2
d1289ae
8e3e8e3
21ed16a
1e5cddc
21ed16a
1e5cddc
21ed16a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d1289ae
4d82c20
 
21ed16a
4d82c20
e665513
d1289ae
 
21ed16a
d1289ae
21ed16a
4d82c20
 
 
 
f57bc73
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
from gradio_client import Client
import gradio as gr
import requests
from langchain.chains import RetrievalQA
import pinecone
from langchain.vectorstores import Pinecone
import os
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
import time

API_URL = " "
headers = {"Authorization": f"Bearer {os.environ.get('API_KEY')}"}
retrieval = Client("https://ishaan-mital-ncert-helper-vector-db.hf.space/--replicas/149bl5mjn/")

embed_model_id = 'sentence-transformers/all-MiniLM-L6-v2'
# # device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

embed_model = HuggingFaceEmbeddings(
    model_name=embed_model_id,
    # model_kwargs={'device': device},
    # encode_kwargs={'device': device, 'batch_size': 32}
)


pinecone.init(
    api_key=os.environ.get('PINECONE_API_KEY'),
    environment=os.environ.get('PINECONE_ENVIRONMENT')
)

index_name = 'llama-rag'
index = pinecone.Index(index_name)
text_field = 'text'  # field in metadata that contains text content
# docs = [
#     "this is one document",
#     "and another document"
# ]

# embeddings = embed_model.embed_documents(docs)
# if index_name not in pinecone.list_indexes():
#     pinecone.create_index(
#         index_name,
#         dimension=len(embeddings[0]),
#         metric='cosine'
#     )
#     # wait for index to finish initialization
#     while not pinecone.describe_index(index_name).status['ready']:
#         time.sleep(1)
vectorstore = Pinecone(
    index, embed_model.embed_query, text_field
)
def call_llm_api(input_text,context):
    init_prompt ="## Instruction: You are an AI language model and must return truthful responses as per the information. Do not answer with any information which isn't completely verified and correct. Do not lie. Do not present information where you don't know the answer. Do not include incorrect extra information. Your name is NCERT Helper. You are a helpful and truthful chatbot. \n"
    info="Information: \n"
    payload = {
        "inputs": init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:"
            }
    l=len(init_prompt + info + context + "\nQuestion: " + input_text + "\nAnswer:")
    response = requests.post(API_URL, headers=headers, json=payload)
    return response.json(),l  # Adjust as needed based on your API response format
	
# import requests
# from langchain.llms import Runnable

# class HuggingFaceApiWrapper(Runnable):
#     def __init__(self, api_endpoint):
#         self.api_endpoint = api_endpoint

#     def run(self, prompt):
#         payload = {
#             "prompt": prompt,
#             "temperature": 0.7,
#             "max_new_tokens": 512,
#             "repetition_penalty": 1.1,
#             "do_sample": True
#             # Add any other parameters needed by your API
#         }

#         # Make a POST request to the Hugging Face model API
#         response = requests.post(self.api_endpoint, json=payload)

#         # Check if the request was successful
#         if response.status_code == 200:
#             result = response.json()
#             # Extract and return the generated text from the API response
#             return result.get("generated_text", "")
#         else:
#             # Handle error cases
#             print(f"Error: {response.status_code}")
#             return None

# # Example usage
# API_ENDPOINT = "https://your-hugging-face-api-endpoint.com"
# hugging_face_api_wrapper = HuggingFaceApiWrapper(api_endpoint=API_ENDPOINT)

# # Now you can use hugging_face_api_wrapper as llm in RetrievalQA
# rag_pipeline = RetrievalQA(
#     llm=hugging_face_api_wrapper, chain_type = "stuff",
#     retriever=vectorstore.as_retriever(),
#     # llm_result_processor=your_result_processor_function
# )



def main(question):
    # return rag_pipeline(question)
    global chatbot
    context = retrieval.predict(question, api_name = "/predict")
    answer,l=call_llm_api(question,context)
    chatbot = answer
    return answer[0]['generated_text'][l:]

demo = gr.Interface(main, inputs = "text", outputs = "text")

if __name__ == "__main__":
    demo.launch()