Baskar2005 commited on
Commit
653b753
·
verified ·
1 Parent(s): 2938f89

Update dspy_qa.py

Browse files
Files changed (1) hide show
  1. dspy_qa.py +13 -12
dspy_qa.py CHANGED
@@ -4,11 +4,12 @@ import dspy
4
  from dsp.utils import deduplicate
5
  from dspy.retrieve.faiss_rm import FaissRM
6
  from langchain_community.document_loaders import PyPDFLoader
7
- from langchain_community.document_loaders import CSVLoader
8
-
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
 
11
- # os.environ["AZURE_OPENAI_API_KEY"] = ""
 
 
 
12
 
13
  class GenerateSearchQuery(dspy.Signature):
14
  """Write a simple search query that will help answer a complex question."""
@@ -32,9 +33,9 @@ class DocQA(dspy.Module):
32
  def __init__(self, file_path,passages_per_hop=3, max_hops=2):
33
  super().__init__()
34
  self.cache = "cache.json"
35
- self.llm = dspy.AzureOpenAI(api_base="https://azureadople.openai.azure.com/",
36
- api_version="2023-09-15-preview",
37
- model="GPT-3")
38
 
39
  self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
40
  self.retrieve = dspy.Retrieve(k=passages_per_hop)
@@ -44,14 +45,14 @@ class DocQA(dspy.Module):
44
  self.knowledge_base = self.create_knowledge_base(file_path)
45
 
46
  def load_documents(self, file_path):
47
- print("file_path", file_path)
48
- loader = CSVLoader(file_path)
49
  documents = loader.load()
50
  return documents
51
 
52
  def split_documents(self, documents):
53
  text_splitter = RecursiveCharacterTextSplitter(
54
- chunk_size=6000,
55
  chunk_overlap=0,
56
  length_function=len,
57
  is_separator_regex=False,
@@ -59,11 +60,11 @@ class DocQA(dspy.Module):
59
 
60
  docs = text_splitter.split_documents(documents)
61
  document_chunks = [page_content.page_content for page_content in docs]
62
- print("input context Ready")
63
  return document_chunks
64
 
65
  def create_knowledge_base(self, file_path):
66
- print("file_path", file_path)
67
  document = self.load_documents(file_path)
68
  split_documents = self.split_documents(document)
69
  knowledge_base = FaissRM(split_documents)
@@ -72,8 +73,8 @@ class DocQA(dspy.Module):
72
  def run(self,question):
73
  dspy.settings.configure(lm=self.llm, rm=self.knowledge_base)
74
 
75
-
76
  passages = self.retrieve(question).passages
 
77
  context = deduplicate(passages)
78
 
79
  pred = self.generate_answer(context=context, question=question)
 
4
  from dsp.utils import deduplicate
5
  from dspy.retrieve.faiss_rm import FaissRM
6
  from langchain_community.document_loaders import PyPDFLoader
 
 
7
  from langchain_text_splitters import RecursiveCharacterTextSplitter
8
 
9
+ api_base=os.getenv("AZURE_OPENAI_ENDPOINT")
10
+ api_version=os.getenv("OPENAI_API_VERSION")
11
+
12
+
13
 
14
  class GenerateSearchQuery(dspy.Signature):
15
  """Write a simple search query that will help answer a complex question."""
 
33
  def __init__(self, file_path,passages_per_hop=3, max_hops=2):
34
  super().__init__()
35
  self.cache = "cache.json"
36
+ self.llm = dspy.AzureOpenAI(api_base=api_base,
37
+ api_version=api_version,
38
+ model="GPT-4o")
39
 
40
  self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
41
  self.retrieve = dspy.Retrieve(k=passages_per_hop)
 
45
  self.knowledge_base = self.create_knowledge_base(file_path)
46
 
47
  def load_documents(self, file_path):
48
+ # print("file_path", file_path)
49
+ loader = PyPDFLoader(file_path)
50
  documents = loader.load()
51
  return documents
52
 
53
  def split_documents(self, documents):
54
  text_splitter = RecursiveCharacterTextSplitter(
55
+ chunk_size=10000,
56
  chunk_overlap=0,
57
  length_function=len,
58
  is_separator_regex=False,
 
60
 
61
  docs = text_splitter.split_documents(documents)
62
  document_chunks = [page_content.page_content for page_content in docs]
63
+ # print("input context Ready")
64
  return document_chunks
65
 
66
  def create_knowledge_base(self, file_path):
67
+ # print("file_path", file_path)
68
  document = self.load_documents(file_path)
69
  split_documents = self.split_documents(document)
70
  knowledge_base = FaissRM(split_documents)
 
73
  def run(self,question):
74
  dspy.settings.configure(lm=self.llm, rm=self.knowledge_base)
75
 
 
76
  passages = self.retrieve(question).passages
77
+ print("passages", passages)
78
  context = deduplicate(passages)
79
 
80
  pred = self.generate_answer(context=context, question=question)