Spaces:

mgbam
/

sythenticdata

Sleeping

App Files Files Community

mgbam commited on Feb 9

Commit

f3076fc

verified ·

1 Parent(s): c608949

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -11

app.py CHANGED Viewed

@@ -33,10 +33,10 @@ class QADataGenerator:
         self._setup_providers()
         self._setup_input_handlers()
         self._initialize_session_state()
-        # Updated prompt template with escaped curly braces for literal output
         self.custom_prompt_template: str = (
             "You are an expert in extracting question and answer pairs from documents. "
-            "Generate 3 Q&A pairs from the following data, formatted as a JSON list of dictionaries. "
             "Each dictionary must have keys 'question' and 'answer'. "
             "The questions should be clear and concise, and the answers must be based solely on the provided data with no external information. "
             "Do not hallucinate. \n\n"
@@ -44,7 +44,7 @@ class QADataGenerator:
             "[{{'question': 'What is the capital of France?', 'answer': 'Paris'}}, "
             "{{'question': 'What is the highest mountain in the world?', 'answer': 'Mount Everest'}}, "
             "{{'question': 'What is the chemical symbol for gold?', 'answer': 'Au'}}]\n\n"
-            "Now, generate 3 Q&A pairs from this data:\n{data}"
         )
     def _setup_providers(self) -> None:
@@ -85,10 +85,11 @@ class QADataGenerator:
                 "provider": "OpenAI",
                 "model": "gpt-4-turbo",
                 "temperature": DEFAULT_TEMPERATURE,
             },
             "api_key": "",
             "inputs": [],       # List to store input sources
-            "qa_pairs": "",     # Generated Q&A pairs output
             "error_logs": [],   # To store any error messages
         }
         for key, value in defaults.items():
@@ -157,10 +158,12 @@ class QADataGenerator:
     def build_prompt(self) -> str:
         """
-        Build the complete prompt using the custom template and aggregated inputs.
         """
         data = self.aggregate_inputs()
-        prompt = self.custom_prompt_template.format(data=data)
         st.write("### Built Prompt")
         st.write(prompt)
         return prompt
@@ -242,11 +245,10 @@ class QADataGenerator:
         """
         Parse the LLM response and return a list of Q&A pairs.
         Expects the response to be JSON formatted; if JSON decoding fails,
-        tries to use ast.literal_eval as a fallback.
         """
         st.write("Parsing response for provider:", provider)
         try:
-            # For non-HuggingFace providers, extract the raw text from the response.
             if provider == "HuggingFace":
                 if isinstance(response, list) and response and "generated_text" in response[0]:
                     raw_text = response[0]["generated_text"]
@@ -260,7 +262,6 @@ class QADataGenerator:
                     self.log_error("Unexpected response format from provider.")
                     return []
-            # Attempt to parse using json.loads first.
             try:
                 qa_list = json.loads(raw_text)
             except json.JSONDecodeError as e:
@@ -297,6 +298,9 @@ def config_ui(generator: QADataGenerator) -> None:
         temperature = st.slider("Temperature", 0.0, 1.0, DEFAULT_TEMPERATURE)
         st.session_state.config["temperature"] = temperature
         api_key = st.text_input(f"{provider} API Key", type="password")
         st.session_state.api_key = api_key
@@ -347,17 +351,32 @@ def input_ui(generator: QADataGenerator) -> None:
             st.success("Database input added!")
 def output_ui(generator: QADataGenerator) -> None:
-    """Display the generated Q&A pairs and provide a download option."""
     st.subheader("Q&A Pairs Output")
     if st.session_state.qa_pairs:
         st.write("### Generated Q&A Pairs")
         st.write(st.session_state.qa_pairs)
         st.download_button(
-            "Download Output",
             json.dumps(st.session_state.qa_pairs, indent=2),
             file_name="qa_pairs.json",
             mime="application/json"
         )
     else:
         st.info("No Q&A pairs generated yet.")

         self._setup_providers()
         self._setup_input_handlers()
         self._initialize_session_state()
+        # Updated prompt template with dynamic {num_examples} parameter and escaped curly braces
         self.custom_prompt_template: str = (
             "You are an expert in extracting question and answer pairs from documents. "
+            "Generate {num_examples} Q&A pairs from the following data, formatted as a JSON list of dictionaries. "
             "Each dictionary must have keys 'question' and 'answer'. "
             "The questions should be clear and concise, and the answers must be based solely on the provided data with no external information. "
             "Do not hallucinate. \n\n"
             "[{{'question': 'What is the capital of France?', 'answer': 'Paris'}}, "
             "{{'question': 'What is the highest mountain in the world?', 'answer': 'Mount Everest'}}, "
             "{{'question': 'What is the chemical symbol for gold?', 'answer': 'Au'}}]\n\n"
+            "Now, generate {num_examples} Q&A pairs from this data:\n{data}"
         )
     def _setup_providers(self) -> None:
                 "provider": "OpenAI",
                 "model": "gpt-4-turbo",
                 "temperature": DEFAULT_TEMPERATURE,
+                "num_examples": 3,  # Default number of Q&A pairs
             },
             "api_key": "",
             "inputs": [],       # List to store input sources
+            "qa_pairs": None,   # Generated Q&A pairs output
             "error_logs": [],   # To store any error messages
         }
         for key, value in defaults.items():
     def build_prompt(self) -> str:
         """
+        Build the complete prompt using the custom template, aggregated inputs,
+        and the number of examples.
         """
         data = self.aggregate_inputs()
+        num_examples = st.session_state.config.get("num_examples", 3)
+        prompt = self.custom_prompt_template.format(data=data, num_examples=num_examples)
         st.write("### Built Prompt")
         st.write(prompt)
         return prompt
         """
         Parse the LLM response and return a list of Q&A pairs.
         Expects the response to be JSON formatted; if JSON decoding fails,
+        uses ast.literal_eval as a fallback.
         """
         st.write("Parsing response for provider:", provider)
         try:
             if provider == "HuggingFace":
                 if isinstance(response, list) and response and "generated_text" in response[0]:
                     raw_text = response[0]["generated_text"]
                     self.log_error("Unexpected response format from provider.")
                     return []
             try:
                 qa_list = json.loads(raw_text)
             except json.JSONDecodeError as e:
         temperature = st.slider("Temperature", 0.0, 1.0, DEFAULT_TEMPERATURE)
         st.session_state.config["temperature"] = temperature
+        num_examples = st.number_input("Number of Q&A Pairs", min_value=1, max_value=10, value=3, step=1)
+        st.session_state.config["num_examples"] = num_examples
         api_key = st.text_input(f"{provider} API Key", type="password")
         st.session_state.api_key = api_key
             st.success("Database input added!")
 def output_ui(generator: QADataGenerator) -> None:
+    """Display the generated Q&A pairs and provide download options."""
     st.subheader("Q&A Pairs Output")
     if st.session_state.qa_pairs:
         st.write("### Generated Q&A Pairs")
         st.write(st.session_state.qa_pairs)
+        # Download as JSON
         st.download_button(
+            "Download as JSON",
             json.dumps(st.session_state.qa_pairs, indent=2),
             file_name="qa_pairs.json",
             mime="application/json"
         )
+        # Download as CSV
+        try:
+            df = pd.DataFrame(st.session_state.qa_pairs)
+            csv_data = df.to_csv(index=False)
+            st.download_button(
+                "Download as CSV",
+                csv_data,
+                file_name="qa_pairs.csv",
+                mime="text/csv"
+            )
+        except Exception as e:
+            st.error(f"Error generating CSV: {e}")
     else:
         st.info("No Q&A pairs generated yet.")