Spaces:
Sleeping
Sleeping
Upload 10 files
Browse files- .gitattributes +1 -0
- Classes/Helper_Class.py +57 -0
- Classes/Owiki_Class.py +104 -0
- Classes/__init__.py +3 -0
- Classes/__pycache__/Helper_Class.cpython-312.pyc +0 -0
- Classes/__pycache__/Owiki_Class.cpython-312.pyc +0 -0
- Classes/__pycache__/__init__.cpython-312.pyc +0 -0
- __init__.py +0 -0
- config.json +7 -0
- faiss_index/index.faiss +3 -0
- faiss_index/index.pkl +3 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
faiss_index/index.faiss filter=lfs diff=lfs merge=lfs -text
|
Classes/Helper_Class.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import List
|
2 |
+
from PyPDF2 import PdfReader
|
3 |
+
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
4 |
+
from langchain_google_genai import GoogleGenerativeAIEmbeddings
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
import os
|
7 |
+
import google.generativeai as genai
|
8 |
+
|
9 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
|
10 |
+
genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
|
11 |
+
|
12 |
+
|
13 |
+
class GenerateFIASSDB:
|
14 |
+
def __init__(self,pdf_docs : List[str], save_loc:str, model_embeddings: str = "models/embedding-001")-> None:
|
15 |
+
self.save_loc = save_loc
|
16 |
+
self.embedding = model_embeddings
|
17 |
+
text = self.get_pdf_text(pdf_docs)
|
18 |
+
text_chunks = self.get_text_chunks(text)
|
19 |
+
self.get_vector_store(text_chunks)
|
20 |
+
pass #configure gen ai key from config file
|
21 |
+
|
22 |
+
def get_pdf_text(self,pdf_docs : List[str]) -> str:
|
23 |
+
text = ""
|
24 |
+
for pdf in pdf_docs:
|
25 |
+
pdf_reader= PdfReader(pdf)
|
26 |
+
for page in pdf_reader.pages:
|
27 |
+
text+= page.extract_text()
|
28 |
+
return text
|
29 |
+
|
30 |
+
def get_text_chunks(self, text : str) -> List:
|
31 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
|
32 |
+
chunks = text_splitter.split_text(text)
|
33 |
+
return chunks
|
34 |
+
|
35 |
+
def get_vector_store(self, text_chunks : List) -> None:
|
36 |
+
embeddings = GoogleGenerativeAIEmbeddings(model = self.embedding)
|
37 |
+
vector_store = FAISS.from_texts(text_chunks, embedding=embeddings)
|
38 |
+
vector_store.save_local(self.save_loc)
|
39 |
+
|
40 |
+
|
41 |
+
class DB_Retriever:
|
42 |
+
def __init__(self, db_loc : str, model_embeddings : str = "models/embedding-001") -> None:
|
43 |
+
self.db_loc = db_loc
|
44 |
+
self.embeddings = GoogleGenerativeAIEmbeddings(model = model_embeddings)
|
45 |
+
self.db = FAISS.load_local(self.db_loc, self.embeddings,allow_dangerous_deserialization = True)
|
46 |
+
|
47 |
+
def retrieve(self, query : str) -> List[str]:
|
48 |
+
# docs = self.db.similarity_search(query)
|
49 |
+
retriver = self.db.as_retriever()
|
50 |
+
# output_docs = retriver.invoke(query)
|
51 |
+
# return output_docs
|
52 |
+
return retriver
|
53 |
+
|
54 |
+
if __name__ =="__main__":
|
55 |
+
res = DB_Retriever("src/faiss_index").retrieve("What is cloud adapter in google connection?")
|
56 |
+
print(len(res))
|
57 |
+
print('\n\n\n\n',res[1])
|
Classes/Owiki_Class.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import google.generativeai as genai
|
3 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
4 |
+
from langchain_community.vectorstores import FAISS
|
5 |
+
from langchain_core.output_parsers import StrOutputParser
|
6 |
+
from langchain_core.runnables import RunnablePassthrough
|
7 |
+
from langchain.prompts import PromptTemplate
|
8 |
+
import json
|
9 |
+
import re
|
10 |
+
from Classes.Helper_Class import DB_Retriever
|
11 |
+
from typing import Optional
|
12 |
+
|
13 |
+
os.environ["GOOGLE_API_KEY"] = "AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438"
|
14 |
+
genai.configure(api_key="AIzaSyBoghqvvnMMS4bA61LjQkkPNdIRetqk438")
|
15 |
+
|
16 |
+
class OWiki:
|
17 |
+
def __init__(self,**kwargs):
|
18 |
+
temperature = kwargs['temperature']
|
19 |
+
self.summary = kwargs['summary_length']
|
20 |
+
model = kwargs["model"]
|
21 |
+
self.db_loc = kwargs["db_loc"]
|
22 |
+
self.llm = ChatGoogleGenerativeAI(model=model,
|
23 |
+
temperature=temperature)
|
24 |
+
self.model_embedding = kwargs['model_embeddings']
|
25 |
+
|
26 |
+
|
27 |
+
def get_summary_template(self):
|
28 |
+
prompt = """Generate a summary for the following conversational data in less than {summary} lines.\nText:\n{text}\n\nSummary:"""
|
29 |
+
prompt_template = PromptTemplate(template = prompt,input_variables=['summary','text'])
|
30 |
+
return prompt_template
|
31 |
+
|
32 |
+
def create_sql_prompt_template(self,schemas):
|
33 |
+
prompt = """Write an SQL query for the following questions whose schemas are as follows.\nSQL Schema:"""
|
34 |
+
for table_name,table_schema in schemas.items():
|
35 |
+
prompt+= f"Table Name: {table_name}, Schema : {table_schema}\n\n"
|
36 |
+
prompt+= """\n\nQuestion:{question}\n\nAnswer:"""
|
37 |
+
prompt_template = PromptTemplate(template = prompt,input_variables=['question'])
|
38 |
+
return prompt_template
|
39 |
+
|
40 |
+
def create_prompt_for_OIC_bot(self):
|
41 |
+
template = """You are OIC(Oracle Integration Cloud) Bot.Follow chat instructions and answer the question based only on the following
|
42 |
+
Chat_instructions:
|
43 |
+
1. Response must contain Question Explaination along with Potential Solution Headings.
|
44 |
+
2. Response must contain all possible Error Scenarios if applicable along with a Summary Heading containing breif summary at the end.
|
45 |
+
|
46 |
+
Context:
|
47 |
+
{context}
|
48 |
+
|
49 |
+
Question: {question}
|
50 |
+
"""
|
51 |
+
prompt = PromptTemplate.from_template(template)
|
52 |
+
return prompt
|
53 |
+
|
54 |
+
def create_sql_agent(self,question,schemas):
|
55 |
+
prompt_template = self.create_sql_prompt_template(schemas)
|
56 |
+
chain = prompt_template | self.llm | StrOutputParser()
|
57 |
+
response = chain.invoke({"question":question})
|
58 |
+
response = self.format_llm_response(response)
|
59 |
+
return response
|
60 |
+
|
61 |
+
def generate_summary(self,text):
|
62 |
+
prompt_template = self.get_summary_template()
|
63 |
+
chain = prompt_template | self.llm | StrOutputParser()
|
64 |
+
response = chain.invoke({"text":text,"summary":self.summary})
|
65 |
+
return response
|
66 |
+
|
67 |
+
def format_llm_response(self,text):
|
68 |
+
bold_pattern = r"\*\*(.*?)\*\*"
|
69 |
+
italic_pattern = r"\*(.*?)\*"
|
70 |
+
code_pattern = r"```(.*?)```"
|
71 |
+
text = text.replace('\n', '<br>')
|
72 |
+
formatted_text = re.sub(code_pattern,"<pre><code>\\1</code></pre>",text)
|
73 |
+
formatted_text = re.sub(bold_pattern, "<b>\\1</b>", formatted_text)
|
74 |
+
formatted_text = re.sub(italic_pattern, "<i>\\1</i>", formatted_text)
|
75 |
+
return formatted_text
|
76 |
+
|
77 |
+
def search_from_db(self, query : str, chat_history : Optional[str] ) -> str :
|
78 |
+
db = DB_Retriever(self.db_loc,self.model_embedding)
|
79 |
+
retriever = db.retrieve(query)
|
80 |
+
prompt = self.create_prompt_for_OIC_bot()
|
81 |
+
chat_history = self.generate_summary(chat_history)
|
82 |
+
retrieval_chain = (
|
83 |
+
{"context": retriever, "question": RunnablePassthrough()}
|
84 |
+
| prompt
|
85 |
+
| self.llm
|
86 |
+
| StrOutputParser()
|
87 |
+
)
|
88 |
+
response = retrieval_chain.invoke(query)
|
89 |
+
# response = self.format_llm_response(response)
|
90 |
+
return response
|
91 |
+
|
92 |
+
if __name__=="__main__":
|
93 |
+
with open("src/config.json",'r') as f:
|
94 |
+
hyperparameters = json.load(f)
|
95 |
+
a = OWiki(**hyperparameters)
|
96 |
+
# print(a.generate_summary("""User:What is ML?\nBot:Machine learning (ML) is a branch of
|
97 |
+
# and computer science that focuses on the using data and algorithms to enable AI to imitate the way that humans learn, gradually improving its accuracy.
|
98 |
+
|
99 |
+
# How does machine learning work?
|
100 |
+
# (link resides outside ibm.com) breaks out the learning system of a machine learning algorithm into three main parts.\nUser:How to integrate with Oracle\nUser:Explain what have you explained above\nBot:"""))
|
101 |
+
# print("*"*100)
|
102 |
+
# hyperparameters = {"User":" id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE","User1":" id1 INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, email TEXT UNIQUE"}
|
103 |
+
# print(a.create_sql_agent("Filter out common values in table 1 and 2 based on id",**hyperparameters))
|
104 |
+
print(a.search_from_db("What is Machine Learning","You can answer out of context as well"))
|
Classes/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
import google.generativeai as genai
|
2 |
+
|
3 |
+
genai.configure(api_key="AIzaSyD2o8vjePJb6z8vT_PVe82lVWMD3_cBL0g")
|
Classes/__pycache__/Helper_Class.cpython-312.pyc
ADDED
Binary file (3.85 kB). View file
|
|
Classes/__pycache__/Owiki_Class.cpython-312.pyc
ADDED
Binary file (5.83 kB). View file
|
|
Classes/__pycache__/__init__.cpython-312.pyc
ADDED
Binary file (354 Bytes). View file
|
|
__init__.py
ADDED
File without changes
|
config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"temperature":0.3,
|
3 |
+
"summary_length":50,
|
4 |
+
"model":"gemini-pro",
|
5 |
+
"db_loc":"src/faiss_index",
|
6 |
+
"model_embeddings":"models/embedding-001"
|
7 |
+
}
|
faiss_index/index.faiss
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb0c2686572eff292bdb06000c6c49a7ba08f4579a68952a16589e0ae5725d1c
|
3 |
+
size 1738797
|
faiss_index/index.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cb85b684526142700928195952dae02ecc1d212215dda84f9e9dcd7a000dd81
|
3 |
+
size 5709230
|