LalitMahale commited on
Commit
f58e385
·
1 Parent(s): 1c2bd30

summarization api added

Browse files
.env CHANGED
@@ -1,3 +1,4 @@
1
  GOOGLE_API = 'AIzaSyB3wI2r6ZgQnYQ3V39PX5S0zWSRqy5ldYw'
2
  TOKEN = "AIzaSyB3wI2r6ZgQnYQ3V39PX5S0zWSRqy5ldYw_Lalit"
3
  GROQ_API = "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"
 
 
1
  GOOGLE_API = 'AIzaSyB3wI2r6ZgQnYQ3V39PX5S0zWSRqy5ldYw'
2
  TOKEN = "AIzaSyB3wI2r6ZgQnYQ3V39PX5S0zWSRqy5ldYw_Lalit"
3
  GROQ_API = "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"
4
+ MODEL = "llama-3.3-70b-versatile"
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  __py*
2
- utils/__py*
 
 
1
  __py*
2
+ utils/__py*
3
+ uploads*
all_answers.pkl DELETED
Binary file (53.4 kB)
 
all_merged_list.pkl DELETED
Binary file (71.6 kB)
 
all_mix_embedding.pkl DELETED
Binary file (708 kB)
 
app.py CHANGED
@@ -11,6 +11,7 @@ import base64
11
  from pathlib import Path
12
  from process import Response
13
  import uuid
 
14
  load_dotenv()
15
  # Create the FastAPI app instance
16
  os.makedirs("/tmp/huggingface_cache", exist_ok=True)
@@ -118,34 +119,44 @@ async def audio_chat(audio: UploadFile = File(...), token: str = ""):
118
 
119
 
120
  # Request model
 
 
 
121
  class FileUploadRequest(BaseModel):
 
122
  filename: str
123
  content_type: str
124
  base64_file: str
125
 
126
- UPLOAD_DIR = "/tmp/uploads"
127
- Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
128
-
129
  @app.post("/summarizer")
130
  async def upload_base64(file_data: FileUploadRequest):
131
  try:
132
- print(file_data.filename)
133
- file_path = os.path.join(UPLOAD_DIR, file_data.filename)
134
 
135
  with open(file_path, "wb") as f:
136
  f.write(base64.b64decode(file_data.base64_file))
137
-
138
- extracted_text = f"Saved file: {file_path}\nContent-Type: {file_data.content_type}\n"
139
- extracted_text += f"(First 100 bytes shown)\n\n"
140
- with open(file_path, "rb") as f:
141
- extracted_text += repr(f.read(100))
142
-
143
- return {"text": "api under development"}
 
 
 
 
 
 
144
 
145
  except Exception as e:
 
 
146
  raise HTTPException(status_code=500, detail=str(e))
147
 
 
148
 
 
149
  @app.post("/upload")
150
  async def upload_file(req: UploadRequest):
151
  session_id = str(uuid.uuid4())
 
11
  from pathlib import Path
12
  from process import Response
13
  import uuid
14
+ from utils.summary import Summary
15
  load_dotenv()
16
  # Create the FastAPI app instance
17
  os.makedirs("/tmp/huggingface_cache", exist_ok=True)
 
119
 
120
 
121
  # Request model
122
+ UPLOAD_DIR = "uploads"
123
+ Path(UPLOAD_DIR).mkdir(parents=True, exist_ok=True)
124
+
125
  class FileUploadRequest(BaseModel):
126
+ token: str
127
  filename: str
128
  content_type: str
129
  base64_file: str
130
 
 
 
 
131
  @app.post("/summarizer")
132
  async def upload_base64(file_data: FileUploadRequest):
133
  try:
134
+ file_path = os.path.join(os.getcwd(),UPLOAD_DIR, file_data.filename)
 
135
 
136
  with open(file_path, "wb") as f:
137
  f.write(base64.b64decode(file_data.base64_file))
138
+ if file_data.content_type == "application/pdf":
139
+ doc = Summary().load_doc(doc_path=file_path)
140
+ chunks,size = Summary().doc_chunk(docs_path=doc)
141
+ summary = Summary().get_summary(document=chunks,len_document=size)
142
+ print(summary)
143
+ return {"text":summary}
144
+ else:
145
+ return {"text":f"{file_data.content_type} not supported"}
146
+ # with open(file_path, "rb") as f:
147
+ # content = f.read(100)
148
+ summary = "# working in *progress*.."
149
+
150
+ return {"text": summary}
151
 
152
  except Exception as e:
153
+ import traceback
154
+ traceback.print_exc() # 🔥 Show full error in terminal
155
  raise HTTPException(status_code=500, detail=str(e))
156
 
157
+
158
 
159
+ ## RAG Chatbot
160
  @app.post("/upload")
161
  async def upload_file(req: UploadRequest):
162
  session_id = str(uuid.uuid4())
process.py CHANGED
@@ -13,7 +13,7 @@ class Response:
13
  res = self.client.chat.completions.create(
14
  messages=[
15
  {"role":"system",
16
- "content":f"You are a Question answer chatbot. You have to understand the given content based on that provide answer. If don't know tell unable to get details. only give answers like your are lalit. do not provide addition text.",
17
  "role": "user",
18
  "content": f"Content : {mytext}\n\n Question : {query}",
19
  }
 
13
  res = self.client.chat.completions.create(
14
  messages=[
15
  {"role":"system",
16
+ "content":f"You are a Question answer chatbot. You have to understand the given content based on that provide answer. If don't know tell unable to get details. only give answers like your are lalit. Provide response required for user question do not provide additional context.",
17
  "role": "user",
18
  "content": f"Content : {mytext}\n\n Question : {query}",
19
  }
requirements.txt CHANGED
@@ -8,3 +8,6 @@ langchain-google-genai
8
  faster_whisper
9
  groq==0.28.0
10
  python-dotenv==1.1.0
 
 
 
 
8
  faster_whisper
9
  groq==0.28.0
10
  python-dotenv==1.1.0
11
+ langchain-groq==0.3.4
12
+ pymupdf==1.26.1
13
+ transformers==4.53.0
utils/convert_embedding.py CHANGED
@@ -1,24 +1,24 @@
1
- from sentence_transformers import SentenceTransformer
2
 
3
- class GetEmbedding:
4
- def __init__(self,data:list):
5
- self.data = data
6
- def user_query_emb(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
7
- try:
8
- model = SentenceTransformer(model_name_or_path=model_name)
9
- embedding = model.encode(self.data)
10
- return embedding
11
- except Exception as e:
12
- print(e)
13
 
14
- def convert_data(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
15
- try:
16
- model = SentenceTransformer(model_name)
17
- embeddings = model.encode(self.data)
18
- return embeddings
19
- except Exception as e:
20
- print(e)
21
 
22
- if __name__ == "__main__":
23
- emb = GetEmbedding("lalit")
24
- print( emb)
 
1
+ # from sentence_transformers import SentenceTransformer
2
 
3
+ # class GetEmbedding:
4
+ # def __init__(self,data:list):
5
+ # self.data = data
6
+ # def user_query_emb(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
7
+ # try:
8
+ # model = SentenceTransformer(model_name_or_path=model_name)
9
+ # embedding = model.encode(self.data)
10
+ # return embedding
11
+ # except Exception as e:
12
+ # print(e)
13
 
14
+ # def convert_data(self,model_name:str = 'paraphrase-MiniLM-L6-v2'):
15
+ # try:
16
+ # model = SentenceTransformer(model_name)
17
+ # embeddings = model.encode(self.data)
18
+ # return embeddings
19
+ # except Exception as e:
20
+ # print(e)
21
 
22
+ # if __name__ == "__main__":
23
+ # emb = GetEmbedding("lalit")
24
+ # print( emb)
utils/rag.py CHANGED
@@ -1,42 +1,42 @@
1
- from langchain_google_genai import GoogleGenerativeAI
2
- import requests
3
- from bs4 import BeautifulSoup
4
- from dotenv import load_dotenv
5
- import os
6
- load_dotenv()
7
 
8
 
9
- class RAG:
10
- def __init__(self):
11
- self.url = 'https://lalitmahale.github.io'
12
- self.llm = GoogleGenerativeAI(google_api_key=os.getenv("GOOGLE_API"),model="gemini-1.5-pro")
13
 
14
 
15
- def get_data(self):
16
- try:
17
- res = requests.get(self.url)
18
- soup = BeautifulSoup(res.content, "html.parser")
19
- return soup.get_text()
20
- except Exception as e:
21
- print(e)
22
-
23
- def clean_text(self):
24
- return self.get_data().replace("\n","")
25
-
26
- def prompt(self):
27
- return """You are a helpfull assistant for me and Your name is lalit mahale. understand the below context and give answer for user question.
28
 
29
- context : {context}\n\nQuestion : {question}\n\nGive proper answer for this questions."""
30
 
31
 
32
- def pipeline(self,query):
33
- try:
34
- prompt = self.prompt().format(context = self.clean_text(),question = query)
35
- return self.llm.invoke(prompt)
36
- except Exception as e:
37
- print(e)
38
 
39
 
40
- if __name__ == "__main__":
41
- res = RAG().pipeline("who is lalit mahale")
42
- print(res)
 
1
+ # from langchain_google_genai import GoogleGenerativeAI
2
+ # import requests
3
+ # from bs4 import BeautifulSoup
4
+ # from dotenv import load_dotenv
5
+ # import os
6
+ # load_dotenv()
7
 
8
 
9
+ # class RAG:
10
+ # def __init__(self):
11
+ # self.url = 'https://lalitmahale.github.io'
12
+ # self.llm = GoogleGenerativeAI(google_api_key=os.getenv("GOOGLE_API"),model="gemini-1.5-pro")
13
 
14
 
15
+ # def get_data(self):
16
+ # try:
17
+ # res = requests.get(self.url)
18
+ # soup = BeautifulSoup(res.content, "html.parser")
19
+ # return soup.get_text()
20
+ # except Exception as e:
21
+ # print(e)
22
+
23
+ # def clean_text(self):
24
+ # return self.get_data().replace("\n","")
25
+
26
+ # def prompt(self):
27
+ # return """You are a helpfull assistant for me and Your name is lalit mahale. understand the below context and give answer for user question.
28
 
29
+ # context : {context}\n\nQuestion : {question}\n\nGive proper answer for this questions."""
30
 
31
 
32
+ # def pipeline(self,query):
33
+ # try:
34
+ # prompt = self.prompt().format(context = self.clean_text(),question = query)
35
+ # return self.llm.invoke(prompt)
36
+ # except Exception as e:
37
+ # print(e)
38
 
39
 
40
+ # if __name__ == "__main__":
41
+ # res = RAG().pipeline("who is lalit mahale")
42
+ # print(res)
utils/summary.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ # from langchain_google_genai import GoogleGenerativeAI
4
+ from langchain_groq import ChatGroq
5
+ from langchain_community.document_loaders import PyMuPDFLoader
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from langchain.chains.summarize import load_summarize_chain
8
+ load_dotenv()
9
+
10
+ class Summary:
11
+ def __init__(self):
12
+ self.model =llm = ChatGroq(api_key=os.getenv("GROQ_API"),model=os.getenv("MODEL"))
13
+ #GoogleGenerativeAI(api_key = os.getenv("API_KEY"),model = os.getenv("MODEL"))
14
+
15
+ def load_doc(self,doc_path:str):
16
+ try:
17
+ print("doc-path :",doc_path)
18
+ doc_loader = PyMuPDFLoader(file_path=doc_path)
19
+ return doc_loader.load()
20
+ except Exception as e:
21
+ print(e)
22
+
23
+ def doc_chunk(self,docs_path:list,CHUNK_SIZE:int= 3000,CHUNK_OVERLAP:int = 100):
24
+ splitter = RecursiveCharacterTextSplitter(chunk_size = CHUNK_SIZE,chunk_overlap = CHUNK_OVERLAP)
25
+ chunks = splitter.split_documents(docs_path)
26
+ return chunks,len(chunks)
27
+
28
+ def get_summary(self,document:list, len_document:int):
29
+ try:
30
+ if len_document == 1:
31
+ chain = load_summarize_chain(chain_type="stuff",llm = self.model)
32
+ result = chain.invoke(document)
33
+ return result["output_text"]
34
+ else:
35
+ chain = load_summarize_chain(chain_type="map_reduce",llm = self.model)
36
+ result = chain.invoke(document)
37
+ return result["output_text"]
38
+ except Exception as e:
39
+ print(e)