chatbot-g-pdf

Runtime error

App Files Files Community

Achille Thin - Genesis commited on Feb 27, 2024

Commit

4b75db9

1 Parent(s): 63c3d58

adding local data loading

Browse files

Files changed (1) hide show

app.py +30 -13

app.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import json
 import gradio as gr
 from llama_index import (
     VectorStoreIndex,
     download_loader,
 )
 import chromadb
 from llama_index.llms import MistralAI
 from llama_index.embeddings import MistralAIEmbedding
 from llama_index.vector_stores import ChromaVectorStore
@@ -21,7 +22,7 @@ placeholder = (
 placeholder_url = "Extract text from this url"
 llm_model = "mistral-small"
-env_api_key = os.environ.get("MISTRAL_API_KEY")
 query_engine = None
 # Define LLMs
@@ -52,7 +53,10 @@ def get_documents_in_db():
     print("Fetching documents in DB")
     docs = []
     for item in chroma_collection.get(include=["metadatas"])["metadatas"]:
-        docs.append(json.loads(item["_node_content"])["metadata"]["file_name"])
     docs = list(set(docs))
     print(f"Found {len(docs)} documents")
     out = "**List of files in db:**\n"
@@ -81,17 +85,29 @@ def load_file(file):
     )
 def load_local_data(data_folder):
-    ids = chroma_collection.get()["ids"]
-    chroma_collection.delete(ids)
-    print('Cleaning DB')
     for file in os.listdir(data_folder):
-        print('Adding file ' + file + ' to DB')
-        documents = loader.load_data(file= data_folder + file)
-        for doc in documents:
-            index.insert(doc)
 def load_document(input_file):
     file_name = input_file.name.split("/")[-1]
@@ -124,7 +140,6 @@ with gr.Blocks() as demo:
             file_msg = gr.Textbox(
                 label="Loaded documents:", container=False, visible=False
             )
             input_file.upload(
                 fn=load_document,
                 inputs=[
@@ -134,6 +149,8 @@ with gr.Blocks() as demo:
                 concurrency_limit=20,
             )
             help_msg = gr.Markdown(
                 value="Once the document is loaded, press the Encode button below to add it to the db."
             )

 import os
 import json
+import pandas as pd
 import gradio as gr
 from llama_index import (
     VectorStoreIndex,
     download_loader,
 )
 import chromadb
+from llama_index import Document
 from llama_index.llms import MistralAI
 from llama_index.embeddings import MistralAIEmbedding
 from llama_index.vector_stores import ChromaVectorStore
 placeholder_url = "Extract text from this url"
 llm_model = "mistral-small"
+env_api_key = 'Yb2kAF0DR4Mva5AEmoYFV3kYRAKdXB7i'#os.environ.get("MISTRAL_API_KEY")
 query_engine = None
 # Define LLMs
     print("Fetching documents in DB")
     docs = []
     for item in chroma_collection.get(include=["metadatas"])["metadatas"]:
+        try:
+            docs.append(json.loads(item["_node_content"])["metadata"]["file_name"])
+        except:
+            pass
     docs = list(set(docs))
     print(f"Found {len(docs)} documents")
     out = "**List of files in db:**\n"
     )
 def load_local_data(data_folder):
     for file in os.listdir(data_folder):
+        if file.endswith('.pdf'):
+            print('Adding file ' + file + ' to DB')
+            documents = loader.load_data(file= data_folder + file)
+            for doc in documents:
+                index.insert(doc)
+        if file.endswith('.txt'):
+            print('Adding file ' + file + ' to DB')
+            with open(data_folder + file, 'r') as f:
+                file_ = f.read()
+                index.insert(Document(text=file_))
+        if file=='price_by_crop.csv':
+            print('Adding file ' + file + ' to DB')
+            prices_text = 'The price of some agricultural data is given by this csv: It displays three scenario, a mean, an optimistic, and a pessimistic' + str(pd.read_csv(data_folder + file))
+            index.insert(Document(text=prices_text))
+        if file=='data_cout_production_grandes_cultures_2021_2025.xlsx':
+            production_costs = ""
+            for _, row in pd.read_excel(data_folder + file).iterrows():
+                if row['ANNEE']==2024:
+                    production_costs += f"Le coût de production par tonne en moyenne pour {row['CULTURES']} était {row['MOYENNE']} euros par tonne avec un scénario moyen, {row['QUART INFERIEUR']} pour un scénario optimiste, et {row['QUART SUPERIEUR']} pour un scénario pessimiste. \n"
+            print('Adding file ' + file + ' to DB')
+            index.insert(Document(text=production_costs))
 def load_document(input_file):
     file_name = input_file.name.split("/")[-1]
             file_msg = gr.Textbox(
                 label="Loaded documents:", container=False, visible=False
             )
             input_file.upload(
                 fn=load_document,
                 inputs=[
                 concurrency_limit=20,
             )
+            load_local_data('data/')
+            load_local_data('data/pdf/')
             help_msg = gr.Markdown(
                 value="Once the document is loaded, press the Encode button below to add it to the db."
             )