Sujithanumala commited on
Commit
cf4a8a7
·
verified ·
1 Parent(s): 9947a95

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
Classes/Helper_Class.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
5
+ from langchain_community.vectorstores import FAISS
6
+ import os
7
+ import google.generativeai as genai
8
+
9
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
10
+ genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
11
+
12
+
13
+ class GenerateFIASSDB:
14
+ def __init__(self,pdf_docs : List[str], save_loc:str, model_embeddings: str = "models/embedding-001")-> None:
15
+ self.save_loc = save_loc
16
+ self.embedding = model_embeddings
17
+ text = self.get_pdf_text(pdf_docs)
18
+ text_chunks = self.get_text_chunks(text)
19
+ self.get_vector_store(text_chunks)
20
+ pass #configure gen ai key from config file
21
+
22
+ def get_pdf_text(self,pdf_docs : List[str]) -> str:
23
+ text = ""
24
+ for pdf in pdf_docs:
25
+ pdf_reader= PdfReader(pdf)
26
+ for page in pdf_reader.pages:
27
+ text+= page.extract_text()
28
+ return text
29
+
30
+ def get_text_chunks(self, text : str) -> List:
31
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
32
+ chunks = text_splitter.split_text(text)
33
+ return chunks
34
+
35
+ def get_vector_store(self, text_chunks : List) -> None:
36
+ embeddings = GoogleGenerativeAIEmbeddings(model = self.embedding)
37
+ vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
38
+ vector_store.save_local(self.save_loc)
39
+
40
+
41
+ class DB_Retriever:
42
+ def __init__(self, db_loc : str, model_embeddings : str = "models/embedding-001") -> None:
43
+ self.db_loc = db_loc
44
+ self.embeddings = GoogleGenerativeAIEmbeddings(model = model_embeddings)
45
+ self.db = FAISS.load_local(self.db_loc, self.embeddings,allow_dangerous_deserialization = True)
46
+
47
+ def retrieve(self, query : str) -> List[str]:
48
+ # docs = self.db.similarity_search(query)
49
+ retriver = self.db.as_retriever()
50
+ # output_docs = retriver.invoke(query)
51
+ # return output_docs
52
+ return retriver
53
+
54
+ if __name__ =="__main__":
55
+ res = DB_Retriever("src/faiss_index").retrieve("What is cloud adapter in google connection?")
56
+ print(len(res))
57
+ print('\n\n\n\n',res[1])
Classes/Owiki_Class.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import google.generativeai as genai
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langchain_community.vectorstores import FAISS
5
+ from langchain_core.output_parsers import StrOutputParser
6
+ from langchain_core.runnables import RunnablePassthrough
7
+ from langchain.prompts import PromptTemplate
8
+ import json
9
+ import re
10
+ from Classes.Helper_Class import DB_Retriever
11
+ from typing import Optional
12
+
13
+ os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
14
+ genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
15
+
16
+ class OWiki:
17
+ def __init__(self,**kwargs):
18
+ temperature = kwargs['temperature']
19
+ self.summary = kwargs['summary_length']
20
+ model = kwargs["model"]
21
+ self.db_loc = kwargs["db_loc"]
22
+ self.llm = ChatGoogleGenerativeAI(model=model,
23
+ temperature=temperature)
24
+ self.model_embedding = kwargs['model_embeddings']
25
+
26
+
27
+ def get_summary_template(self):
28
+ prompt = """Generate a summary for the following conversational data in less than {summary} lines.\nText:\n{text}\n\nSummary:"""
29
+ prompt_template = PromptTemplate(template = prompt,input_variables=['summary','text'])
30
+ return prompt_template
31
+
32
+ def create_sql_prompt_template(self,schemas):
33
+ prompt = """Write an SQL query for the following questions whose schemas are as follows.\nSQL Schema:"""
34
+ for table_name,table_schema in schemas.items():
35
+ prompt+= f"Table Name: {table_name}, Schema : {table_schema}\n\n"
36
+ prompt+= """\n\nQuestion:{question}\n\nAnswer:"""
37
+ prompt_template = PromptTemplate(template = prompt,input_variables=['question'])
38
+ return prompt_template
39
+
40
+ def create_prompt_for_OIC_bot(self):
41
+ template = """You are OIC(Oracle Integration Cloud) Bot.Follow chat instructions and answer the question based only on the following
42
+ Chat_instructions:
43
+ 1. Response must contain Question Explaination along with Potential Solution Headings.
44
+ 2. Response must contain all possible Error Scenarios if applicable along with a Summary Heading containing breif summary at the end.
45
+
46
+ Context:
47
+ {context}
48
+
49
+ Question: {question}
50
+ """
51
+ prompt = PromptTemplate.from_template(template)
52
+ return prompt
53
+
54
+ def create_sql_agent(self,question,schemas):
55
+ prompt_template = self.create_sql_prompt_template(schemas)
56
+ chain = prompt_template | self.llm | StrOutputParser()
57
+ response = chain.invoke({"question":question})
58
+ response = self.format_llm_response(response)
59
+ return response
60
+
61
+ def generate_summary(self,text):
62
+ prompt_template = self.get_summary_template()
63
+ chain = prompt_template | self.llm | StrOutputParser()
64
+ response = chain.invoke({"text":text,"summary":self.summary})
65
+ return response
66
+
67
+ def format_llm_response(self,text):
68
+ bold_pattern = r"\*\*(.*?)\*\*"
69
+ italic_pattern = r"\*(.*?)\*"
70
+ code_pattern = r"```(.*?)```"
71
+ text = text.replace('\n', '<br>')
72
+ formatted_text = re.sub(code_pattern,"<pre><code>\\1</code></pre>",text)
73
+ formatted_text = re.sub(bold_pattern, "<b>\\1</b>", formatted_text)
74
+ formatted_text = re.sub(italic_pattern, "<i>\\1</i>", formatted_text)
75
+ return formatted_text
76
+
77
+ def search_from_db(self, query : str, chat_history : Optional[str] ) -> str :
78
+ db = DB_Retriever(self.db_loc,self.model_embedding)
79
+ retriever = db.retrieve(query)
80
+ prompt = self.create_prompt_for_OIC_bot()
81
+ chat_history = self.generate_summary(chat_history)
82
+ retrieval_chain = (
83
+ {"context": retriever, "question": RunnablePassthrough()}
84
+ | prompt
85
+ | self.llm
86
+ | StrOutputParser()
87
+ )
88
+ response = retrieval_chain.invoke(query)
89
+ # response = self.format_llm_response(response)
90
+ return response
91
+
92
+ if __name__=="__main__":
93
+ with open("src/config.json",'r') as f:
94
+ hyperparameters = json.load(f)
95
+ a = OWiki(**hyperparameters)
96
+ # print(a.generate_summary("""User:What is ML?\nBot:Machine learning (ML) is a branch of
97
+ # and computer science that focuses on the using data and algorithms to enable AI to imitate the way that humans learn, gradually improving its accuracy.
98
+
99
+ # How does machine learning work?
100
+ # (link resides outside ibm.com) breaks out the learning system of a machine learning algorithm into three main parts.\nUser:How to integrate with Oracle\nUser:Explain what have you explained above\nBot:"""))
101
+ # print("*"*100)
102
+ # hyperparameters = {"User":" id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE","User1":" id1 INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE"}
103
+ # print(a.create_sql_agent("Filter out common values in table 1 and 2 based on id",**hyperparameters))
104
+ print(a.search_from_db("What is Machine Learning","You can answer out of context as well"))
Classes/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import google.generativeai as genai
2
+
3
+ genai.configure(api_key="AIzaSyD2o8vjePJb6z8vT_PVe82lVWMD3_cBL0g")
Classes/__pycache__/Helper_Class.cpython-312.pyc ADDED
Binary file (3.85 kB). View file
 
Classes/__pycache__/Owiki_Class.cpython-312.pyc ADDED
Binary file (5.83 kB). View file
 
Classes/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (354 Bytes). View file
 
__init__.py ADDED
File without changes
config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "temperature":0.3,
3
+ "summary_length":50,
4
+ "model":"gemini-pro",
5
+ "db_loc":"src/faiss_index",
6
+ "model_embeddings":"models/embedding-001"
7
+ }
faiss_index/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0c2686572eff292bdb06000c6c49a7ba08f4579a68952a16589e0ae5725d1c
3
+ size 1738797
faiss_index/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb85b684526142700928195952dae02ecc1d212215dda84f9e9dcd7a000dd81
3
+ size 5709230