LalitMahale commited on
Commit
3b0a769
·
1 Parent(s): 642a8cb
Files changed (3) hide show
  1. app.py +11 -9
  2. main.py +56 -56
  3. utils/vector_store.py +47 -47
app.py CHANGED
@@ -4,7 +4,7 @@ from deep_translator import GoogleTranslator
4
  from fastapi.responses import JSONResponse
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import os
7
- from main import process,audio_process
8
  from dotenv import load_dotenv
9
  import base64
10
  from pathlib import Path
@@ -46,25 +46,27 @@ async def home():
46
 
47
  # Token verification function
48
  def verify_token(token: str):
 
49
  if token != os.getenv("TOKEN"):
50
  raise HTTPException(status_code=401, detail="Token not matched")
51
 
52
  # Translate endpoint that accepts a query parameter 'text'
53
- @app.get("/translate")
54
- async def translate(text: str = "", token: str = ""):
55
- if not text or not token:
56
- raise HTTPException(status_code=400, detail="No text or token provided")
57
- verify_token(token)
58
- translator = GoogleTranslator(source="auto", target="mr")
59
- result = translator.translate(text)
60
 
61
- return {"result": result}
62
 
63
 
64
  @app.post("/chatbot")
65
  async def chatbot(req:ChatBot):
66
  query = req.text
67
  token = req.token
 
68
  if not query or not token:
69
  raise HTTPException(status_code=400, detail="No text provided")
70
  verify_token(token=token)
 
4
  from fastapi.responses import JSONResponse
5
  from fastapi.middleware.cors import CORSMiddleware
6
  import os
7
+ # from main import process,audio_process
8
  from dotenv import load_dotenv
9
  import base64
10
  from pathlib import Path
 
46
 
47
  # Token verification function
48
  def verify_token(token: str):
49
+ print("token: ",token)
50
  if token != os.getenv("TOKEN"):
51
  raise HTTPException(status_code=401, detail="Token not matched")
52
 
53
  # Translate endpoint that accepts a query parameter 'text'
54
+ # @app.get("/translate")
55
+ # async def translate(text: str = "", token: str = ""):
56
+ # if not text or not token:
57
+ # raise HTTPException(status_code=400, detail="No text or token provided")
58
+ # verify_token(token)
59
+ # translator = GoogleTranslator(source="auto", target="mr")
60
+ # result = translator.translate(text)
61
 
62
+ # return {"result": result}
63
 
64
 
65
  @app.post("/chatbot")
66
  async def chatbot(req:ChatBot):
67
  query = req.text
68
  token = req.token
69
+ print("query : ",query)
70
  if not query or not token:
71
  raise HTTPException(status_code=400, detail="No text provided")
72
  verify_token(token=token)
main.py CHANGED
@@ -1,58 +1,58 @@
1
- import numpy as np
2
- from sklearn.metrics.pairwise import cosine_similarity
3
- from utils.convert_embedding import GetEmbedding
4
- import random
5
- import pickle
6
- import os
7
- from utils.rag import RAG
8
- from faster_whisper import WhisperModel
9
-
10
-
11
- def process(user_query:str):
12
- # dump_user_question(user_query)
13
- user_embedding = GetEmbedding([user_query]).user_query_emb()
14
- with open(r"all_mix_embedding.pkl","rb") as f:
15
- load_embedding = pickle.load(f)
16
-
17
- with open(r"all_answers.pkl","rb") as f:
18
- ans = pickle.load(f)
19
- similarity_scores = cosine_similarity(user_embedding, load_embedding)
20
- index = np.argmax(similarity_scores)
21
- answer = ans[index]
22
- score = similarity_scores[0,index]
23
- print(f"Index : {index}:\tscore:{score} \tquery: {user_query}")
24
-
25
- if float(score) > 0.60 :
26
- final_output = random.choice(answer)
27
- else:
28
- final_output = RAG().pipeline(query=user_query)
29
-
30
- return final_output
31
-
32
-
33
- def audio_process(audio):
34
- try:
35
- model = WhisperModel(model_size_or_path="medium.en")
36
- segments, info = model.transcribe(audio)
37
- transcription = " ".join([seg.text for seg in segments])
38
- result = process(user_query=transcription)
39
- return result
40
- except Exception as e:
41
- print("Error:", e)
42
- return str(e)
43
-
44
-
45
-
46
-
47
- if __name__ == "__main__":
48
- res = audio_process(r"C:\Users\lalit\Documents\Sound recordings\who_is_lalit.m4a")
49
- print(res)
50
- # for _ in range(3):
51
 
52
- # user = input("How can i help you :? \n")
53
- # result = process(user)
54
- # print(result)
55
 
56
- # with open(r"data\question_data.pkl","rb") as f:
57
- # que = pickle.load(f)
58
- # print(que)
 
1
+ # import numpy as np
2
+ # from sklearn.metrics.pairwise import cosine_similarity
3
+ # from utils.convert_embedding import GetEmbedding
4
+ # import random
5
+ # import pickle
6
+ # import os
7
+ # from utils.rag import RAG
8
+ # from faster_whisper import WhisperModel
9
+
10
+
11
+ # def process(user_query:str):
12
+ # # dump_user_question(user_query)
13
+ # user_embedding = GetEmbedding([user_query]).user_query_emb()
14
+ # with open(r"all_mix_embedding.pkl","rb") as f:
15
+ # load_embedding = pickle.load(f)
16
+
17
+ # with open(r"all_answers.pkl","rb") as f:
18
+ # ans = pickle.load(f)
19
+ # similarity_scores = cosine_similarity(user_embedding, load_embedding)
20
+ # index = np.argmax(similarity_scores)
21
+ # answer = ans[index]
22
+ # score = similarity_scores[0,index]
23
+ # print(f"Index : {index}:\tscore:{score} \tquery: {user_query}")
24
+
25
+ # if float(score) > 0.60 :
26
+ # final_output = random.choice(answer)
27
+ # else:
28
+ # final_output = RAG().pipeline(query=user_query)
29
+
30
+ # return final_output
31
+
32
+
33
+ # def audio_process(audio):
34
+ # try:
35
+ # model = WhisperModel(model_size_or_path="medium.en")
36
+ # segments, info = model.transcribe(audio)
37
+ # transcription = " ".join([seg.text for seg in segments])
38
+ # result = process(user_query=transcription)
39
+ # return result
40
+ # except Exception as e:
41
+ # print("Error:", e)
42
+ # return str(e)
43
+
44
+
45
+
46
+
47
+ # if __name__ == "__main__":
48
+ # res = audio_process(r"C:\Users\lalit\Documents\Sound recordings\who_is_lalit.m4a")
49
+ # print(res)
50
+ # # for _ in range(3):
51
 
52
+ # # user = input("How can i help you :? \n")
53
+ # # result = process(user)
54
+ # # print(result)
55
 
56
+ # # with open(r"data\question_data.pkl","rb") as f:
57
+ # # que = pickle.load(f)
58
+ # # print(que)
utils/vector_store.py CHANGED
@@ -1,50 +1,50 @@
1
- from faiss import IndexFlatL2,write_index,read_index
2
- import numpy as np
3
- from utils.convert_embedding import GetEmbedding
4
-
5
-
6
- class VectorStore:
7
- def __init__(self):
8
- pass
9
-
10
- def store_vectors(self,data:list,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy' ):
11
- try:
12
- embeddings = GetEmbedding(data=data).convert_emb()
13
- diamension = embeddings.shape[1]
14
- print("Diamension",diamension)
15
- # Create L2 distance index
16
- index = IndexFlatL2(diamension)
17
-
18
- index.add(embeddings)
19
-
20
- write_index(index, embedding_space_name)
21
-
22
- # Save embeddings to file
23
- np.save(numpy_emb_space, embeddings)
24
- return True
25
- except Exception as e:
26
- print(e)
27
- return False
28
-
29
- def get_similary_search(self,query,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy',K:int= 1):
30
- # Load the FAISS index
31
- index = read_index('faiss_index.index')
32
-
33
- # Load the embeddings
34
- embeddings_np = np.load('embeddings.npy')
35
-
36
- # Now you can perform similarity searches on the index
37
- query = "What is photosynthesis?"
38
- query_embedding = GetEmbedding([query]).convert_emb()
39
- query_embedding = query_embedding.detach().numpy()
40
- # query_embedding = np.array(query_embedding) # Convert to numpy array
41
- # query_embedding = query_embedding.reshape(1, -1)
42
- # print("shape")
43
- # print(query_embedding.shape)
44
- # Perform search
45
- distances, indices = index.search(query_embedding, k = K)
46
-
47
- return indices
48
 
49
 
50
 
 
1
+ # from faiss import IndexFlatL2,write_index,read_index
2
+ # import numpy as np
3
+ # from utils.convert_embedding import GetEmbedding
4
+
5
+
6
+ # class VectorStore:
7
+ # def __init__(self):
8
+ # pass
9
+
10
+ # def store_vectors(self,data:list,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy' ):
11
+ # try:
12
+ # embeddings = GetEmbedding(data=data).convert_emb()
13
+ # diamension = embeddings.shape[1]
14
+ # print("Diamension",diamension)
15
+ # # Create L2 distance index
16
+ # index = IndexFlatL2(diamension)
17
+
18
+ # index.add(embeddings)
19
+
20
+ # write_index(index, embedding_space_name)
21
+
22
+ # # Save embeddings to file
23
+ # np.save(numpy_emb_space, embeddings)
24
+ # return True
25
+ # except Exception as e:
26
+ # print(e)
27
+ # return False
28
+
29
+ # def get_similary_search(self,query,embedding_space_name:str = 'faiss_index.index',numpy_emb_space:str = 'embeddings.npy',K:int= 1):
30
+ # # Load the FAISS index
31
+ # index = read_index('faiss_index.index')
32
+
33
+ # # Load the embeddings
34
+ # embeddings_np = np.load('embeddings.npy')
35
+
36
+ # # Now you can perform similarity searches on the index
37
+ # query = "What is photosynthesis?"
38
+ # query_embedding = GetEmbedding([query]).convert_emb()
39
+ # query_embedding = query_embedding.detach().numpy()
40
+ # # query_embedding = np.array(query_embedding) # Convert to numpy array
41
+ # # query_embedding = query_embedding.reshape(1, -1)
42
+ # # print("shape")
43
+ # # print(query_embedding.shape)
44
+ # # Perform search
45
+ # distances, indices = index.search(query_embedding, k = K)
46
+
47
+ # return indices
48
 
49
 
50