Spaces:

Sujithanumala
/

Oracle_Wikipage

Sleeping

App Files Files Community

Sujithanumala commited on Nov 20, 2024

Commit

cf4a8a7

verified ·

1 Parent(s): 9947a95

Upload 10 files

Browse files

Files changed (11) hide show

.gitattributes +1 -0
Classes/Helper_Class.py +57 -0
Classes/Owiki_Class.py +104 -0
Classes/__init__.py +3 -0
Classes/__pycache__/Helper_Class.cpython-312.pyc +0 -0
Classes/__pycache__/Owiki_Class.cpython-312.pyc +0 -0
Classes/__pycache__/__init__.cpython-312.pyc +0 -0
__init__.py +0 -0
config.json +7 -0
faiss_index/index.faiss +3 -0
faiss_index/index.pkl +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text

Classes/Helper_Class.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from typing import List
+from PyPDF2 import PdfReader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_google_genai import GoogleGenerativeAIEmbeddings
+from langchain_community.vectorstores import FAISS
+import os
+import google.generativeai as genai
+os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
+genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
+class GenerateFIASSDB:
+    def __init__(self,pdf_docs : List[str], save_loc:str, model_embeddings: str = "models/embedding-001")-> None:
+        self.save_loc = save_loc
+        self.embedding = model_embeddings
+        text = self.get_pdf_text(pdf_docs)
+        text_chunks = self.get_text_chunks(text)
+        self.get_vector_store(text_chunks)
+        pass  #configure gen ai key from config file
+    def get_pdf_text(self,pdf_docs : List[str]) -> str:
+        text = ""
+        for pdf in pdf_docs:
+            pdf_reader= PdfReader(pdf)
+            for page in pdf_reader.pages:
+                text+= page.extract_text()
+        return text
+    def get_text_chunks(self, text : str) -> List:
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
+        chunks = text_splitter.split_text(text)
+        return chunks
+    def get_vector_store(self, text_chunks : List) -> None:
+        embeddings = GoogleGenerativeAIEmbeddings(model = self.embedding)
+        vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
+        vector_store.save_local(self.save_loc)
+class DB_Retriever:
+    def __init__(self, db_loc : str, model_embeddings : str = "models/embedding-001") -> None:
+        self.db_loc = db_loc
+        self.embeddings = GoogleGenerativeAIEmbeddings(model = model_embeddings)
+        self.db = FAISS.load_local(self.db_loc, self.embeddings,allow_dangerous_deserialization  = True)
+    def retrieve(self, query : str) -> List[str]:
+        # docs = self.db.similarity_search(query)
+        retriver = self.db.as_retriever()
+        # output_docs =  retriver.invoke(query)
+        # return output_docs
+        return retriver
+if __name__ =="__main__":
+    res = DB_Retriever("src/faiss_index").retrieve("What is cloud adapter in google connection?")
+    print(len(res))
+    print('\n\n\n\n',res[1])

Classes/Owiki_Class.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+import google.generativeai as genai
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_community.vectorstores import FAISS
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.runnables import RunnablePassthrough
+from langchain.prompts import PromptTemplate
+import json
+import re
+from Classes.Helper_Class import DB_Retriever
+from typing import Optional
+os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
+genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
+class OWiki:
+    def __init__(self,**kwargs):
+        temperature = kwargs['temperature']
+        self.summary = kwargs['summary_length']
+        model = kwargs["model"]
+        self.db_loc = kwargs["db_loc"]
+        self.llm = ChatGoogleGenerativeAI(model=model,
+                            temperature=temperature)
+        self.model_embedding = kwargs['model_embeddings']
+    def get_summary_template(self):
+        prompt = """Generate a summary for the following conversational data in less than {summary} lines.\nText:\n{text}\n\nSummary:"""
+        prompt_template = PromptTemplate(template = prompt,input_variables=['summary','text'])
+        return prompt_template
+    def create_sql_prompt_template(self,schemas):
+        prompt =  """Write an SQL query for the following questions whose schemas are as follows.\nSQL Schema:"""
+        for table_name,table_schema in schemas.items():
+            prompt+= f"Table Name: {table_name}, Schema : {table_schema}\n\n"
+        prompt+=  """\n\nQuestion:{question}\n\nAnswer:"""
+        prompt_template = PromptTemplate(template = prompt,input_variables=['question'])
+        return prompt_template
+    def create_prompt_for_OIC_bot(self):
+        template = """You are OIC(Oracle Integration Cloud) Bot.Follow chat instructions and answer the question based only on the following
+        Chat_instructions:
+        1. Response must contain Question Explaination along with Potential Solution Headings.
+        2. Response must contain all possible Error Scenarios if applicable along with a Summary Heading containing breif summary at the end.
+        Context:
+        {context}
+        Question: {question}
+        """
+        prompt = PromptTemplate.from_template(template)
+        return prompt
+    def create_sql_agent(self,question,schemas):
+        prompt_template = self.create_sql_prompt_template(schemas)
+        chain = prompt_template | self.llm | StrOutputParser()
+        response = chain.invoke({"question":question})
+        response = self.format_llm_response(response)
+        return response
+    def generate_summary(self,text):
+        prompt_template = self.get_summary_template()
+        chain = prompt_template | self.llm | StrOutputParser()
+        response = chain.invoke({"text":text,"summary":self.summary})
+        return response
+    def format_llm_response(self,text):
+        bold_pattern = r"\*\*(.*?)\*\*"
+        italic_pattern = r"\*(.*?)\*"
+        code_pattern = r"```(.*?)```"
+        text = text.replace('\n', '<br>')
+        formatted_text = re.sub(code_pattern,"<pre><code>\\1</code></pre>",text)
+        formatted_text = re.sub(bold_pattern, "<b>\\1</b>", formatted_text)
+        formatted_text = re.sub(italic_pattern, "<i>\\1</i>", formatted_text)
+        return formatted_text
+    def search_from_db(self, query : str, chat_history : Optional[str] ) -> str :
+        db = DB_Retriever(self.db_loc,self.model_embedding)
+        retriever = db.retrieve(query)
+        prompt = self.create_prompt_for_OIC_bot()
+        chat_history = self.generate_summary(chat_history)
+        retrieval_chain = (
+        {"context": retriever, "question": RunnablePassthrough()}
+        | prompt
+        | self.llm
+        | StrOutputParser()
+        )
+        response = retrieval_chain.invoke(query)
+        # response = self.format_llm_response(response)
+        return response
+if __name__=="__main__":
+    with open("src/config.json",'r') as f:
+        hyperparameters = json.load(f)
+    a = OWiki(**hyperparameters)
+#     print(a.generate_summary("""User:What is ML?\nBot:Machine learning (ML) is a branch of
+#  and computer science that focuses on the using data and algorithms to enable AI to imitate the way that humans learn, gradually improving its accuracy.
+# How does machine learning work?
+#  (link resides outside ibm.com) breaks out the learning system of a machine learning algorithm into three main parts.\nUser:How to integrate with Oracle\nUser:Explain what have you explained above\nBot:"""))
+#     print("*"*100)
+    # hyperparameters  = {"User":" id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE","User1":" id1 INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE"}
+    # print(a.create_sql_agent("Filter out common values in table 1 and 2 based on id",**hyperparameters))
+    print(a.search_from_db("What is Machine Learning","You can answer out of context as well"))

Classes/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import google.generativeai as genai
2	+
3	+ genai.configure(api_key="AIzaSyD2o8vjePJb6z8vT_PVe82lVWMD3_cBL0g")

Classes/__pycache__/Helper_Class.cpython-312.pyc ADDED Viewed

Binary file (3.85 kB). View file

Classes/__pycache__/Owiki_Class.cpython-312.pyc ADDED Viewed

Binary file (5.83 kB). View file

Classes/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (354 Bytes). View file

__init__.py ADDED Viewed

File without changes

config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "temperature":0.3,
+    "summary_length":50,
+    "model":"gemini-pro",
+    "db_loc":"src/faiss_index",
+    "model_embeddings":"models/embedding-001"
+}

faiss_index/index.faiss ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eb0c2686572eff292bdb06000c6c49a7ba08f4579a68952a16589e0ae5725d1c
+size 1738797

faiss_index/index.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb85b684526142700928195952dae02ecc1d212215dda84f9e9dcd7a000dd81
+size 5709230