Update app.py
Browse files
app.py
CHANGED
@@ -4,13 +4,23 @@ from llama_index.core import (
|
|
4 |
VectorStoreIndex,
|
5 |
SimpleDirectoryReader,
|
6 |
Settings,
|
|
|
|
|
7 |
)
|
8 |
-
from llama_index.core import PromptTemplate
|
9 |
from llama_index.llms.gemini import Gemini
|
10 |
from llama_index.embeddings.gemini import GeminiEmbedding
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
import logging
|
12 |
import google.generativeai as genai
|
13 |
-
from dotenv import load_dotenv
|
14 |
from pathlib import Path
|
15 |
|
16 |
load_dotenv()
|
@@ -18,20 +28,29 @@ load_dotenv()
|
|
18 |
# Set logging level
|
19 |
logging.basicConfig(level=logging.INFO)
|
20 |
|
21 |
-
|
22 |
# Configure Gemini Pro
|
23 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
24 |
|
25 |
-
model_gemini_pro_vision = "gemini-
|
|
|
|
|
26 |
|
27 |
# Configure Gemini models
|
28 |
-
Settings.llm = Gemini(model=
|
29 |
-
api_key=os.getenv("GOOGLE_API_KEY"))
|
30 |
Settings.embed_model = GeminiEmbedding(
|
31 |
model_name="models/embedding-001",
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
)
|
34 |
|
|
|
35 |
def load_and_index_pdf(pdf_path):
|
36 |
"""Loads and index the pdf.
|
37 |
|
@@ -42,25 +61,69 @@ def load_and_index_pdf(pdf_path):
|
|
42 |
index (llama_index.core.VectorStoreIndex): The vector index
|
43 |
"""
|
44 |
try:
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
48 |
logging.info("Creating vector store index")
|
49 |
-
index = VectorStoreIndex
|
50 |
return index
|
51 |
-
|
52 |
logging.warning("No documents found in the PDF")
|
53 |
return None
|
54 |
except Exception as e:
|
55 |
logging.error(f"Error loading and indexing PDF: {e}")
|
56 |
return None
|
|
|
|
|
|
|
|
|
57 |
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
Args :
|
62 |
french_text (str): The french text to translate.
|
63 |
-
|
64 |
|
65 |
Returns:
|
66 |
(str): The yipunu translation or an error message.
|
@@ -76,43 +139,55 @@ def translate_text(french_text, index):
|
|
76 |
)
|
77 |
|
78 |
prompt_template = PromptTemplate(template)
|
79 |
-
|
80 |
-
|
81 |
-
text_qa_template=prompt_template
|
82 |
-
)
|
83 |
-
response = query_engine.query(french_text)
|
84 |
logging.info(f"Translation Result: {response.response}")
|
85 |
return response.response
|
86 |
except Exception as e:
|
87 |
logging.error(f"Error during translation: {e}")
|
88 |
return f"Error during translation: {str(e)}"
|
89 |
-
|
|
|
90 |
|
91 |
def main():
|
92 |
"""Main function for streamlit app."""
|
93 |
|
94 |
st.title("French to Yipunu Translation App")
|
95 |
-
|
96 |
-
# PDF
|
97 |
-
|
98 |
-
|
99 |
-
if
|
100 |
-
|
101 |
-
|
102 |
-
with open(temp_file_path, "wb") as f:
|
103 |
-
f.write(uploaded_file.read())
|
104 |
-
|
105 |
-
index = load_and_index_pdf(str(temp_file_path))
|
106 |
if index:
|
|
|
107 |
french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
|
108 |
if st.button("Translate"):
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
# Clean up temp files
|
113 |
-
os.remove(temp_file_path)
|
114 |
else:
|
115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
|
118 |
if __name__ == "__main__":
|
|
|
4 |
VectorStoreIndex,
|
5 |
SimpleDirectoryReader,
|
6 |
Settings,
|
7 |
+
PromptTemplate,
|
8 |
+
QueryBundle,
|
9 |
)
|
|
|
10 |
from llama_index.llms.gemini import Gemini
|
11 |
from llama_index.embeddings.gemini import GeminiEmbedding
|
12 |
+
from llama_index.core import get_response_synthesizer
|
13 |
+
from llama_index.core.node_parser import SemanticSplitterNodeParser
|
14 |
+
from llama_index.core.retrievers import VectorIndexRetriever
|
15 |
+
from llama_index.core.query_engine import RetrieverQueryEngine
|
16 |
+
from llama_index.core.query_transform import HyDEQueryTransform
|
17 |
+
from llama_index.core.postprocessor import SentenceTransformerRerank
|
18 |
+
from llama_index.core import load_index_from_storage
|
19 |
+
from llama_index.core import StorageContext
|
20 |
+
from llama_index.core.retrievers import QueryFusionRetriever
|
21 |
+
from dotenv import load_dotenv
|
22 |
import logging
|
23 |
import google.generativeai as genai
|
|
|
24 |
from pathlib import Path
|
25 |
|
26 |
load_dotenv()
|
|
|
28 |
# Set logging level
|
29 |
logging.basicConfig(level=logging.INFO)
|
30 |
|
|
|
31 |
# Configure Gemini Pro
|
32 |
genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
|
33 |
|
34 |
+
model_gemini_pro_vision = "gemini-pro-vision"
|
35 |
+
model_gemini_pro = "gemini-pro"
|
36 |
+
|
37 |
|
38 |
# Configure Gemini models
|
39 |
+
Settings.llm = Gemini(model=model_gemini_pro, api_key=os.getenv("GOOGLE_API_KEY"))
|
|
|
40 |
Settings.embed_model = GeminiEmbedding(
|
41 |
model_name="models/embedding-001",
|
42 |
+
api_key=os.getenv("GOOGLE_API_KEY")
|
43 |
+
)
|
44 |
+
|
45 |
+
|
46 |
+
# Function to create a Semantic Splitter Node Parser
|
47 |
+
def create_semantic_splitter_node_parser():
|
48 |
+
"""Creates a semantic splitter."""
|
49 |
+
return SemanticSplitterNodeParser(
|
50 |
+
buffer_size=1, breakpoint_percentile_threshold=95, embed_model=Settings.embed_model
|
51 |
)
|
52 |
|
53 |
+
|
54 |
def load_and_index_pdf(pdf_path):
|
55 |
"""Loads and index the pdf.
|
56 |
|
|
|
61 |
index (llama_index.core.VectorStoreIndex): The vector index
|
62 |
"""
|
63 |
try:
|
64 |
+
logging.info(f"Loading PDF document from: {pdf_path}")
|
65 |
+
documents = SimpleDirectoryReader(input_files=[pdf_path]).load_data()
|
66 |
+
if documents:
|
67 |
+
logging.info("Creating semantic splitter")
|
68 |
+
node_parser = create_semantic_splitter_node_parser()
|
69 |
+
nodes = node_parser.get_nodes_from_documents(documents)
|
70 |
logging.info("Creating vector store index")
|
71 |
+
index = VectorStoreIndex(nodes=nodes)
|
72 |
return index
|
73 |
+
else:
|
74 |
logging.warning("No documents found in the PDF")
|
75 |
return None
|
76 |
except Exception as e:
|
77 |
logging.error(f"Error loading and indexing PDF: {e}")
|
78 |
return None
|
79 |
+
|
80 |
+
|
81 |
+
def create_rag_pipeline(index):
|
82 |
+
"""Creates a RAG pipeline for translation.
|
83 |
|
84 |
+
Args :
|
85 |
+
index (llama_index.core.VectorStoreIndex): The vector index.
|
86 |
+
|
87 |
+
Returns :
|
88 |
+
query_engine(llama_index.core.query_engine.RetrieverQueryEngine): The query engine
|
89 |
+
"""
|
90 |
+
|
91 |
+
logging.info("Initializing RAG Pipeline components")
|
92 |
+
# setup retriever
|
93 |
+
|
94 |
+
retriever = VectorStoreIndex(
|
95 |
+
index.nodes,
|
96 |
+
).as_retriever(similarity_top_k=5)
|
97 |
+
|
98 |
+
|
99 |
+
# setup query transformer
|
100 |
+
hyde_query_transform = HyDEQueryTransform(llm=Settings.llm)
|
101 |
+
|
102 |
+
# setup reranker
|
103 |
+
reranker = SentenceTransformerRerank(top_n=3, model="BAAI/bge-reranker-base")
|
104 |
+
|
105 |
+
# response_synthesizer
|
106 |
+
response_synthesizer = get_response_synthesizer(
|
107 |
+
response_mode="refine",
|
108 |
+
)
|
109 |
+
|
110 |
+
# setup query engine
|
111 |
+
query_engine = RetrieverQueryEngine(
|
112 |
+
retriever=retriever,
|
113 |
+
response_synthesizer=response_synthesizer,
|
114 |
+
node_postprocessors=[reranker],
|
115 |
+
query_transform= hyde_query_transform
|
116 |
+
)
|
117 |
+
|
118 |
+
logging.info("RAG Pipeline is configured.")
|
119 |
+
return query_engine
|
120 |
+
|
121 |
+
def translate_text(french_text, query_engine):
|
122 |
+
"""Translates french text to Yipunu using a highly optimized RAG.
|
123 |
|
124 |
Args :
|
125 |
french_text (str): The french text to translate.
|
126 |
+
query_engine (llama_index.core.query_engine.RetrieverQueryEngine): The query engine.
|
127 |
|
128 |
Returns:
|
129 |
(str): The yipunu translation or an error message.
|
|
|
139 |
)
|
140 |
|
141 |
prompt_template = PromptTemplate(template)
|
142 |
+
query_bundle = QueryBundle(french_text, custom_prompt=prompt_template)
|
143 |
+
response = query_engine.query(query_bundle)
|
|
|
|
|
|
|
144 |
logging.info(f"Translation Result: {response.response}")
|
145 |
return response.response
|
146 |
except Exception as e:
|
147 |
logging.error(f"Error during translation: {e}")
|
148 |
return f"Error during translation: {str(e)}"
|
149 |
+
|
150 |
+
|
151 |
|
152 |
def main():
|
153 |
"""Main function for streamlit app."""
|
154 |
|
155 |
st.title("French to Yipunu Translation App")
|
156 |
+
|
157 |
+
# Construct the path to the PDF in the data folder
|
158 |
+
default_pdf_path = Path("data/parlons_yipunu.pdf")
|
159 |
+
|
160 |
+
# Check if the default pdf_file exists.
|
161 |
+
if default_pdf_path.exists():
|
162 |
+
index = load_and_index_pdf(str(default_pdf_path))
|
|
|
|
|
|
|
|
|
163 |
if index:
|
164 |
+
query_engine = create_rag_pipeline(index)
|
165 |
french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
|
166 |
if st.button("Translate"):
|
167 |
+
translation = translate_text(french_text, query_engine)
|
168 |
+
st.success(f"Yipunu Translation: {translation}")
|
|
|
|
|
|
|
169 |
else:
|
170 |
+
# PDF File Upload
|
171 |
+
uploaded_file = st.file_uploader("Upload a PDF file containing the Punu grammar:", type="pdf")
|
172 |
+
if uploaded_file is not None:
|
173 |
+
# Save file to a temporary location
|
174 |
+
temp_file_path = Path("temp_file.pdf")
|
175 |
+
with open(temp_file_path, "wb") as f:
|
176 |
+
f.write(uploaded_file.read())
|
177 |
+
|
178 |
+
index = load_and_index_pdf(str(temp_file_path))
|
179 |
+
if index:
|
180 |
+
query_engine = create_rag_pipeline(index)
|
181 |
+
french_text = st.text_area("Enter French Text:", "Ni vosi yipunu")
|
182 |
+
if st.button("Translate"):
|
183 |
+
translation = translate_text(french_text, query_engine)
|
184 |
+
st.success(f"Yipunu Translation: {translation}")
|
185 |
+
|
186 |
+
# Clean up temp files
|
187 |
+
os.remove(temp_file_path)
|
188 |
+
else:
|
189 |
+
st.info("Please upload a pdf containing the punu grammar.")
|
190 |
+
|
191 |
|
192 |
|
193 |
if __name__ == "__main__":
|