Spaces:
Sleeping
Sleeping
davidoneilai
commited on
Commit
·
8514dc9
1
Parent(s):
4db208a
retriever funcionando e novo banco de questoes
Browse files- .dockerignore +2 -1
- .gitignore +2 -0
- server/app.py +5 -1
- server/data/retriever.py +17 -14
- server/databases/{banco_de_questoes_v3.txt → banco_de_dados_BIO_HIS_v1.txt} +0 -0
- server/llm/gemini.py +1 -1
- server/services/generate_questions_service.py +26 -22
.dockerignore
CHANGED
@@ -41,4 +41,5 @@ next-env.d.ts
|
|
41 |
.yarn
|
42 |
|
43 |
*venv
|
44 |
-
como_nao_errar.txt
|
|
|
|
41 |
.yarn
|
42 |
|
43 |
*venv
|
44 |
+
como_nao_errar.txt
|
45 |
+
server/venv
|
.gitignore
CHANGED
@@ -28,3 +28,5 @@ dist-ssr
|
|
28 |
|
29 |
*.env
|
30 |
*chroma_db
|
|
|
|
|
|
28 |
|
29 |
*.env
|
30 |
*chroma_db
|
31 |
+
como_nao_errar.txt
|
32 |
+
server/venv
|
server/app.py
CHANGED
@@ -3,11 +3,14 @@ from fastapi.middleware.cors import CORSMiddleware
|
|
3 |
from pydantic import BaseModel
|
4 |
from fastapi.staticfiles import StaticFiles
|
5 |
from services.generate_questions_service import GenerateQuestionsService
|
|
|
6 |
# from data.load_data import retriever_pre
|
7 |
|
8 |
generate_questions_service = GenerateQuestionsService()
|
9 |
|
|
|
10 |
class Body(BaseModel):
|
|
|
11 |
subject: str
|
12 |
difficultie: str
|
13 |
|
@@ -24,9 +27,10 @@ app.add_middleware(
|
|
24 |
|
25 |
@app.post("/generate_questions")
|
26 |
async def generate_questions(body: Body):
|
|
|
27 |
subject = body.subject
|
28 |
difficultie = body.difficultie
|
29 |
-
query = f"Quero que você gere questões de
|
30 |
res = generate_questions_service.handle(f"""{query}""")
|
31 |
return res
|
32 |
|
|
|
3 |
from pydantic import BaseModel
|
4 |
from fastapi.staticfiles import StaticFiles
|
5 |
from services.generate_questions_service import GenerateQuestionsService
|
6 |
+
|
7 |
# from data.load_data import retriever_pre
|
8 |
|
9 |
generate_questions_service = GenerateQuestionsService()
|
10 |
|
11 |
+
|
12 |
class Body(BaseModel):
|
13 |
+
school_subject: str
|
14 |
subject: str
|
15 |
difficultie: str
|
16 |
|
|
|
27 |
|
28 |
@app.post("/generate_questions")
|
29 |
async def generate_questions(body: Body):
|
30 |
+
school_subject = body.school_subject
|
31 |
subject = body.subject
|
32 |
difficultie = body.difficultie
|
33 |
+
query = f"Quero que você gere questões de {school_subject}, sendo do assunto: {subject} e sendo da dificuldade: {difficultie}."
|
34 |
res = generate_questions_service.handle(f"""{query}""")
|
35 |
return res
|
36 |
|
server/data/retriever.py
CHANGED
@@ -3,9 +3,11 @@ from langchain_community.document_loaders import TextLoader
|
|
3 |
from langchain.vectorstores import Chroma
|
4 |
from langchain.chains.query_constructor.base import AttributeInfo
|
5 |
from langchain.retrievers.self_query.base import SelfQueryRetriever
|
|
|
6 |
from llm.gemini import Gemini
|
7 |
from utils.questions_parser import parse_question
|
8 |
|
|
|
9 |
class Retriever:
|
10 |
|
11 |
_model = Gemini()
|
@@ -17,21 +19,14 @@ class Retriever:
|
|
17 |
|
18 |
DATA_PATH = os.environ["DATA_PATH"]
|
19 |
|
20 |
-
|
21 |
-
|
22 |
-
self.questions = list(
|
23 |
-
map(lambda x: "##Questão" + x, self.data_loader[0].page_content.split("##Questão"))
|
24 |
-
)
|
25 |
-
|
26 |
-
self.docs = []
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
self.docs.append(parse_question(question))
|
31 |
-
except Exception as e:
|
32 |
-
print(e, question)
|
33 |
|
34 |
-
self.vectorstore = Chroma.from_documents(
|
|
|
|
|
35 |
|
36 |
self.metadata_field_info = [
|
37 |
AttributeInfo(
|
@@ -58,6 +53,14 @@ class Retriever:
|
|
58 |
|
59 |
document_content_description = "Questões de matérias do ensino médio."
|
60 |
|
|
|
|
|
61 |
self.retriever = SelfQueryRetriever.from_llm(
|
62 |
-
self._model.llm,
|
|
|
|
|
|
|
|
|
63 |
)
|
|
|
|
|
|
3 |
from langchain.vectorstores import Chroma
|
4 |
from langchain.chains.query_constructor.base import AttributeInfo
|
5 |
from langchain.retrievers.self_query.base import SelfQueryRetriever
|
6 |
+
from langchain_text_splitters import CharacterTextSplitter
|
7 |
from llm.gemini import Gemini
|
8 |
from utils.questions_parser import parse_question
|
9 |
|
10 |
+
|
11 |
class Retriever:
|
12 |
|
13 |
_model = Gemini()
|
|
|
19 |
|
20 |
DATA_PATH = os.environ["DATA_PATH"]
|
21 |
|
22 |
+
data_loader = TextLoader(DATA_PATH, encoding="UTF-8").load()
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
+
text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=0)
|
25 |
+
docs = text_splitter.split_documents(data_loader)
|
|
|
|
|
|
|
26 |
|
27 |
+
self.vectorstore = Chroma.from_documents(
|
28 |
+
docs, self._model.embeddings, persist_directory="./chroma_db"
|
29 |
+
)
|
30 |
|
31 |
self.metadata_field_info = [
|
32 |
AttributeInfo(
|
|
|
53 |
|
54 |
document_content_description = "Questões de matérias do ensino médio."
|
55 |
|
56 |
+
db = Chroma.from_documents(docs, self._model.embeddings)
|
57 |
+
|
58 |
self.retriever = SelfQueryRetriever.from_llm(
|
59 |
+
self._model.llm,
|
60 |
+
self.vectorstore,
|
61 |
+
document_content_description,
|
62 |
+
self.metadata_field_info,
|
63 |
+
verbose=True,
|
64 |
)
|
65 |
+
|
66 |
+
self.docs_retriever = db.as_retriever()
|
server/databases/{banco_de_questoes_v3.txt → banco_de_dados_BIO_HIS_v1.txt}
RENAMED
The diff for this file is too large to render.
See raw diff
|
|
server/llm/gemini.py
CHANGED
@@ -84,4 +84,4 @@ class Gemini:
|
|
84 |
)
|
85 |
]
|
86 |
|
87 |
-
self.parser = StructuredOutputParser.from_response_schemas(self.schemas)
|
|
|
84 |
)
|
85 |
]
|
86 |
|
87 |
+
self.parser = StructuredOutputParser.from_response_schemas(self.schemas)
|
server/services/generate_questions_service.py
CHANGED
@@ -11,39 +11,43 @@ class GenerateQuestionsService:
|
|
11 |
_model = Gemini()
|
12 |
|
13 |
def handle(self, query: str):
|
14 |
-
|
15 |
rag_chain = {
|
16 |
"context": self._retrieve.retriever | RunnableLambda(self._format_docs),
|
17 |
"question": RunnablePassthrough(),
|
18 |
} | RunnableLambda(self._get_questions)
|
|
|
19 |
|
20 |
-
|
21 |
-
|
22 |
|
23 |
-
|
|
|
24 |
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
context=context,
|
29 |
-
question=question,
|
30 |
-
format_questions_instructions=self._model._format_questions_instructions,
|
31 |
-
)
|
32 |
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
try:
|
37 |
-
chat = ChatGoogleGenerativeAI(model="gemini-pro")
|
38 |
-
response = chat.invoke(messages)
|
39 |
-
return self._model.parser.parse(response.content)
|
40 |
-
except Exception as e:
|
41 |
-
print(e)
|
42 |
-
tries += 1
|
43 |
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
|
|
|
46 |
|
47 |
def _format_docs(self, docs):
|
48 |
return "\n\n".join(doc.page_content for doc in docs)
|
49 |
-
|
|
|
11 |
_model = Gemini()
|
12 |
|
13 |
def handle(self, query: str):
|
14 |
+
|
15 |
rag_chain = {
|
16 |
"context": self._retrieve.retriever | RunnableLambda(self._format_docs),
|
17 |
"question": RunnablePassthrough(),
|
18 |
} | RunnableLambda(self._get_questions)
|
19 |
+
response_rag = self._retrieve.docs_retriever
|
20 |
|
21 |
+
rag_result = rag_chain.invoke(query)
|
22 |
+
retriever_result = response_rag.invoke(query)
|
23 |
|
24 |
+
print("RAG result:", rag_result)
|
25 |
+
print("Retriever result:", retriever_result)
|
26 |
|
27 |
+
return {"rag_result": rag_result, "retriever_result": retriever_result}
|
28 |
+
|
29 |
+
def _get_questions(self, _dict):
|
|
|
|
|
|
|
|
|
30 |
|
31 |
+
question = _dict["question"]
|
32 |
+
context = _dict["context"]
|
33 |
+
messages = self._model.template.format_messages(
|
34 |
+
context=context,
|
35 |
+
question=question,
|
36 |
+
format_questions_instructions=self._model._format_questions_instructions,
|
37 |
+
)
|
38 |
|
39 |
+
tries = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
|
41 |
+
while tries < 3:
|
42 |
+
try:
|
43 |
+
chat = ChatGoogleGenerativeAI(model="gemini-pro")
|
44 |
+
response = chat.invoke(messages)
|
45 |
+
return self._model.parser.parse(response.content)
|
46 |
+
except Exception as e:
|
47 |
+
print(e)
|
48 |
+
tries += 1
|
49 |
|
50 |
+
return "Não foi possível gerar as questões."
|
51 |
|
52 |
def _format_docs(self, docs):
|
53 |
return "\n\n".join(doc.page_content for doc in docs)
|
|