Spaces:

regraded01
/

pdf-to-table

Sleeping

regraded01 commited on Jun 10, 2024

Commit

ce61e92

1 Parent(s): 123ba7e

feat: create end-to-end run using fake data/llm

Files changed (2) hide show

app_langchain.py CHANGED Viewed

@@ -1,23 +1,37 @@
-import streamlit as st
-import os
-from langchain_core.prompts import PromptTemplate
 from src.utils import load_config_values
 from src.dev_llm import FakeLLM
-# Get HuggingFace API key
-api_key_name = "HUGGINGFACE_HUB_TOKEN"
-api_key = os.getenv(api_key_name)
-if api_key is None:
-    st.error(f"Failed to read `{api_key_name}`. Ensure the token is correctly located")
 # Load in model and pipeline configuration values
-system_message, model_id, template = load_config_values()
-prompt = PromptTemplate(
-    template=template,
-    input_variables=["system_message", "user_message"]
 )
-llm = FakeLLM()

+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
 from src.utils import load_config_values
 from src.dev_llm import FakeLLM
+# TODO: Change this to reflect prod model rather than dev models
+# Initalise fake values and a fake LLM to test out the full pipeline
+tmp_llm = FakeLLM()
+tmp_pdf_text = "This patient is due for an appointment on 1st June 2024" # replace with Runner to a file uploader
 # Load in model and pipeline configuration values
+system_message, context_message, model_id = load_config_values(
+    config_keys=[
+        "system_message",
+        "context_message",
+        "model_id",
+    ]
+)
+prompt = ChatPromptTemplate.from_template(
+    template=context_message,
+)
+chain = (
+    {
+        "system_message": lambda x: system_message,
+        "pdf_text": lambda x: tmp_pdf_text,
+        "data_to_extract": RunnablePassthrough()
+    }
+    |prompt
+    |tmp_llm
 )
+print(chain.invoke("{\"appointment_date\"}"))

config/model_config.yml CHANGED Viewed

@@ -1,3 +1,3 @@
-system_message: "Your role is to take PDF documents and extract their raw text into a JSON format that can be uploaded into a database. Return the JSON only. For example if you need to extract information about a report written on 2nd February 2011 with an author called Jane Mary then return this only: {'report_written_date': '02/02/2011', 'author_name': 'Jane Mary'} Another example would be a clinical exam passed by a student on the 3rd of July 2022 would return this only: {'result' : 'pass', 'date_of_exam' : '03/07/2022'}"
-template: "System: {system_message}\nUser: {user_message}"
 model_id: "meta-llama/Llama-2-70b-chat-hf"

+system_message: "Your role is to take PDF documents and extract their raw text into a JSON format that can be uploaded into a database. Return the JSON only. \nFor example if you need to extract information about a report written on 2nd February 2011 with an author called Jane Mary then return this only: {'report_written_date': '02/02/2011', 'author_name': 'Jane Mary'}\nAnother example would be a clinical exam passed by a student on the 3rd of July 2022 would return this only: {'result' : 'pass', 'date_of_exam' : '03/07/2022'}"
+context_message: "{system_message}\n\nUse the text provided and denoted by 3 backticks ```{pdf_text}```. \nExtract the following values in JSON format.\n{data_to_extract}"
 model_id: "meta-llama/Llama-2-70b-chat-hf"