LalitMahale
commited on
Commit
·
eb66dcb
0
Parent(s):
data push
Browse files- Dockerfile +11 -0
- README.md +12 -0
- app.py +33 -0
- data/answer.pkl +0 -0
- data/question_data.pkl +0 -0
- data/question_embedding_latest.pkl +0 -0
- main.py +50 -0
- requirements.txt +4 -0
- utils/__init__.py +0 -0
- utils/convert_embedding.py +24 -0
- utils/vector_store.py +50 -0
Dockerfile
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10-slim
|
2 |
+
|
3 |
+
WORKDIR /app
|
4 |
+
|
5 |
+
COPY ./ /app
|
6 |
+
|
7 |
+
RUN pip install -r requirements.txt
|
8 |
+
|
9 |
+
|
10 |
+
CMD fastapi run --reload --host=0.0.0.0 --port=7860
|
11 |
+
|
README.md
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
|
3 |
+
title: test-api
|
4 |
+
emoji: 🐳
|
5 |
+
colorFrom: purple
|
6 |
+
colorTo: gray
|
7 |
+
sdk: docker
|
8 |
+
app_port: 7860
|
9 |
+
---
|
10 |
+
|
11 |
+
# test_api
|
12 |
+
|
app.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException
|
2 |
+
from pydantic import BaseModel
|
3 |
+
from deep_translator import GoogleTranslator
|
4 |
+
from fastapi.responses import JSONResponse
|
5 |
+
from main import process
|
6 |
+
# Create the FastAPI app instance
|
7 |
+
app = FastAPI()
|
8 |
+
|
9 |
+
# Root endpoint
|
10 |
+
@app.get("/")
|
11 |
+
async def home():
|
12 |
+
return {"message": "Welcome to my FastAPI API on Hugging Face Spaces!"}
|
13 |
+
|
14 |
+
# Translate endpoint that accepts a query parameter 'text'
|
15 |
+
@app.get("/translate")
|
16 |
+
async def translate(text: str = ""):
|
17 |
+
if not text:
|
18 |
+
raise HTTPException(status_code=400, detail="No text provided")
|
19 |
+
|
20 |
+
# Perform translation using deep_translator
|
21 |
+
translator = GoogleTranslator(source="auto", target="mr")
|
22 |
+
result = translator.translate(text)
|
23 |
+
|
24 |
+
return {"result": result}
|
25 |
+
|
26 |
+
@app.get("/chatbot")
|
27 |
+
async def chatbot(text: str = ""):
|
28 |
+
if not text:
|
29 |
+
raise HTTPException(status_code=400, detail="No text provided")
|
30 |
+
# Perform translation using deep_translator
|
31 |
+
result = process(query=text)
|
32 |
+
return {"result": result}
|
33 |
+
|
data/answer.pkl
ADDED
Binary file (47.1 kB). View file
|
|
data/question_data.pkl
ADDED
Binary file (64 Bytes). View file
|
|
data/question_embedding_latest.pkl
ADDED
Binary file (627 kB). View file
|
|
main.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
3 |
+
from utils.convert_embedding import GetEmbedding
|
4 |
+
import random
|
5 |
+
import pickle
|
6 |
+
import os
|
7 |
+
|
8 |
+
|
9 |
+
|
10 |
+
user_query = "hi"
|
11 |
+
def dump_user_question(query):
|
12 |
+
try:
|
13 |
+
if os.path.exists:
|
14 |
+
with open(r"data\question_data.pkl","rb") as f:
|
15 |
+
que = pickle.load(f)
|
16 |
+
else:
|
17 |
+
que = []
|
18 |
+
que.append(query)
|
19 |
+
with open(r"data\question_data.pkl","wb") as f:
|
20 |
+
que = pickle.dump(que,f)
|
21 |
+
except:
|
22 |
+
with open(r"data\question_data.pkl","wb") as f:
|
23 |
+
pickle.dump([],f)
|
24 |
+
|
25 |
+
def process(user_query):
|
26 |
+
dump_user_question(user_query)
|
27 |
+
user_embedding = GetEmbedding([user_query]).user_query_emb()
|
28 |
+
with open(r"data\question_embedding_latest.pkl","rb") as f:
|
29 |
+
load_embedding = pickle.load(f)
|
30 |
+
|
31 |
+
with open(r"data\answer.pkl","rb") as f:
|
32 |
+
ans = pickle.load(f)
|
33 |
+
similarity_scores = cosine_similarity(user_embedding, load_embedding)
|
34 |
+
index = np.argmax(similarity_scores)
|
35 |
+
answer = ans[index]
|
36 |
+
|
37 |
+
return random.choice(answer)
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
if __name__ == "__main__":
|
42 |
+
pass
|
43 |
+
# for _ in range(3):
|
44 |
+
# user = input("How can i help you :? \n")
|
45 |
+
# result = process(user)
|
46 |
+
# print(result)
|
47 |
+
|
48 |
+
# with open(r"data\question_data.pkl","rb") as f:
|
49 |
+
# que = pickle.load(f)
|
50 |
+
# print(que)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
deep-translator
|
2 |
+
fastapi[standard]
|
3 |
+
gunicorn
|
4 |
+
sentence_transformers
|
utils/__init__.py
ADDED
File without changes
|
utils/convert_embedding.py
ADDED
@@ -0,0 +1,24 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from sentence_transformers import SentenceTransformer
|
2 |
+
|
3 |
+
class GetEmbedding:
|
4 |
+
def __init__(self,data:list):
|
5 |
+
self.data = data
|
6 |
+
def user_query_emb(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
|
7 |
+
try:
|
8 |
+
model = SentenceTransformer(model_name_or_path=model_name)
|
9 |
+
embedding = model.encode(self.data)
|
10 |
+
return embedding
|
11 |
+
except Exception as e:
|
12 |
+
print(e)
|
13 |
+
|
14 |
+
def convert_data(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
|
15 |
+
try:
|
16 |
+
model = SentenceTransformer(model_name)
|
17 |
+
embeddings = model.encode(self.data)
|
18 |
+
return embeddings
|
19 |
+
except Exception as e:
|
20 |
+
print(e)
|
21 |
+
|
22 |
+
if __name__ == "__main__":
|
23 |
+
emb = GetEmbedding("lalit")
|
24 |
+
print( emb)
|
utils/vector_store.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from faiss import IndexFlatL2,write_index,read_index
|
2 |
+
import numpy as np
|
3 |
+
from utils.convert_embedding import GetEmbedding
|
4 |
+
|
5 |
+
|
6 |
+
class VectorStore:
|
7 |
+
def __init__(self):
|
8 |
+
pass
|
9 |
+
|
10 |
+
def store_vectors(self,data:list,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy' ):
|
11 |
+
try:
|
12 |
+
embeddings = GetEmbedding(data=data).convert_emb()
|
13 |
+
diamension = embeddings.shape[1]
|
14 |
+
print("Diamension",diamension)
|
15 |
+
# Create L2 distance index
|
16 |
+
index = IndexFlatL2(diamension)
|
17 |
+
|
18 |
+
index.add(embeddings)
|
19 |
+
|
20 |
+
write_index(index, embedding_space_name)
|
21 |
+
|
22 |
+
# Save embeddings to file
|
23 |
+
np.save(numpy_emb_space, embeddings)
|
24 |
+
return True
|
25 |
+
except Exception as e:
|
26 |
+
print(e)
|
27 |
+
return False
|
28 |
+
|
29 |
+
def get_similary_search(self,query,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy',K:int= 1):
|
30 |
+
# Load the FAISS index
|
31 |
+
index = read_index('faiss_index.index')
|
32 |
+
|
33 |
+
# Load the embeddings
|
34 |
+
embeddings_np = np.load('embeddings.npy')
|
35 |
+
|
36 |
+
# Now you can perform similarity searches on the index
|
37 |
+
query = "What is photosynthesis?"
|
38 |
+
query_embedding = GetEmbedding([query]).convert_emb()
|
39 |
+
query_embedding = query_embedding.detach().numpy()
|
40 |
+
# query_embedding = np.array(query_embedding) # Convert to numpy array
|
41 |
+
# query_embedding = query_embedding.reshape(1, -1)
|
42 |
+
# print("shape")
|
43 |
+
# print(query_embedding.shape)
|
44 |
+
# Perform search
|
45 |
+
distances, indices = index.search(query_embedding, k = K)
|
46 |
+
|
47 |
+
return indices
|
48 |
+
|
49 |
+
|
50 |
+
|