Spaces:

mgbam
/

sythenticdata

Sleeping

App Files Files Community

mgbam commited on Feb 9

Commit

7b16658

verified ·

1 Parent(s): 8d86c18

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -17

app.py CHANGED Viewed

@@ -14,7 +14,8 @@ from typing import Dict, Any
 # Constants for Default Values and API URLs
 HF_API_URL = "https://api-inference.huggingface.co/models/"
-DEFAULT_TEMPERATURE = 0.3
 class SyntheticDataGenerator:
     """
@@ -34,7 +35,7 @@ class SyntheticDataGenerator:
             },
             "Groq": {
                 "client": lambda key: groq.Groq(api_key=key),
-                "models": ["mixtral-8x7b-32768"]
             },
             "HuggingFace": {
                 "client": lambda key: {"headers": {"Authorization": f"Bearer {key}"}},
@@ -42,7 +43,7 @@ class SyntheticDataGenerator:
             },
              "Google": {
                 "client": lambda key: self._configure_google_genai(key), # Using a custom configure function
-                "models": ["gemini-2.0-pro"]  # Add supported Gemini models.  Consider adding "gemini-1.5-pro" when released.
             },
         }
@@ -76,8 +77,8 @@ class SyntheticDataGenerator:
                 'errors': []
             },
             'config': {
-                'provider': "Deepseek",
-                'model': "deepseek-chat",
                 'temperature': DEFAULT_TEMPERATURE
             }
         }
@@ -160,13 +161,22 @@ class SyntheticDataGenerator:
                 stream = img['stream']
                 width = int(stream.get('Width', 0))
                 height = int(stream.get('Height', 0))
-                if width > 0 and height > 0:
-                    images.append({
-                        "data": Image.frombytes("RGB", (width, height), stream.get_data()),
-                        "meta": {"dims": (width, height)}
-                    })
             except Exception as e:
-                self.log_error(f"Image Error: {str(e)}")
         return images
     # Core Generation Engine
@@ -198,6 +208,7 @@ class SyntheticDataGenerator:
                 client = client_initializer(api_key)
             for i, input_data in enumerate(st.session_state.inputs):
                 st.session_state.processing['progress'] = (i+1)/len(st.session_state.inputs)
                 if st.session_state.config['provider'] == "HuggingFace":
@@ -219,6 +230,8 @@ class SyntheticDataGenerator:
     def _standard_inference(self, client, input_data):
          """Performs inference using standard OpenAI-compatible API."""
          try:
             return client.chat.completions.create(
                 model=st.session_state.config['model'],
                 messages=[{
@@ -249,13 +262,16 @@ class SyntheticDataGenerator:
     def _google_inference(self, client, input_data):
         """Performs inference using Google Generative AI API."""
         try:
             model = client(st.session_state.config['model'])  # Instantiate the model with the selected model name
             response = model.generate_content(
                 self._build_prompt(input_data),
                 generation_config = genai.types.GenerationConfig(temperature=st.session_state.config['temperature'])
             )
             return response
         except Exception as e:
             self.log_error(f"Google GenAI Inference Error: {e}")
@@ -263,9 +279,13 @@ class SyntheticDataGenerator:
     def _build_prompt(self, input_data):
         """Builds the prompt for the LLM based on the input data type."""
-        base = "Generate 3 Q&A pairs from this financial content, formatted as a JSON list of dictionaries with 'question' and 'answer' keys:\n"
         if input_data['meta']['type'] == 'csv':
-            return base + "Structured data:\n" + input_data['text']
         elif input_data['meta']['type'] == 'api':
             return base + "API response:\n" + input_data['text']
         return base + input_data['text']
@@ -294,8 +314,16 @@ class SyntheticDataGenerator:
                      return [] # Return empty in case of parsing failure
             else:
                 # Assuming JSON response from other providers (OpenAI, Deepseek, Groq)
-                json_output = json.loads(response.choices[0].message.content) # load the JSON data
-                return json_output.get("qa_pairs", []) # Return the qa_pairs
         except Exception as e:
             self.log_error(f"Parse Error: {e}. Raw Response: {response}")
             return []
@@ -327,7 +355,7 @@ def input_sidebar(gen: SyntheticDataGenerator):
         st.session_state['api_key'] = api_key  #Store API Key
         model = st.selectbox("Model", provider_cfg["models"])
-        temp = st.slider("Temperature", 0.0, 1.0, DEFAULT_TEMPERATURE)
         # Update session config
         st.session_state.config.update({

 # Constants for Default Values and API URLs
 HF_API_URL = "https://api-inference.huggingface.co/models/"
+DEFAULT_TEMPERATURE = 0.1 # Lower Temperature
+MODEL = "mixtral-8x7b-32768" #constant string
 class SyntheticDataGenerator:
     """
             },
             "Groq": {
                 "client": lambda key: groq.Groq(api_key=key),
+                "models": [MODEL]
             },
             "HuggingFace": {
                 "client": lambda key: {"headers": {"Authorization": f"Bearer {key}"}},
             },
              "Google": {
                 "client": lambda key: self._configure_google_genai(key), # Using a custom configure function
+                "models": ["gemini-pro"]  # Use gemini-pro. Consider adding "gemini-pro" when released.
             },
         }
                 'errors': []
             },
             'config': {
+                'provider': "Groq",
+                'model': MODEL,
                 'temperature': DEFAULT_TEMPERATURE
             }
         }
                 stream = img['stream']
                 width = int(stream.get('Width', 0))
                 height = int(stream.get('Height', 0))
+                image_data = stream.get_data()  # Get the image data
+                if width > 0 and height > 0 and image_data: #CHECK image_data
+                    try:
+                        image = Image.frombytes("RGB", (width, height), image_data)
+                        images.append({
+                            "data": image,
+                            "meta": {"dims": (width, height)}
+                        })
+                    except Exception as e:
+                        self.log_error(f"Image Creation Error: {str(e)}") # Log specific image creation errors.
+                else:
+                    self.log_error(f"Image Error: Insufficient image data or invalid dimensions (width={width}, height={height})")
             except Exception as e:
+                self.log_error(f"Image Extraction Error: {str(e)}") # More general extraction error
         return images
     # Core Generation Engine
                 client = client_initializer(api_key)
             for i, input_data in enumerate(st.session_state.inputs):
                 st.session_state.processing['progress'] = (i+1)/len(st.session_state.inputs)
                 if st.session_state.config['provider'] == "HuggingFace":
     def _standard_inference(self, client, input_data):
          """Performs inference using standard OpenAI-compatible API."""
          try:
+            #st.write(input_data['text']) # debugging data
             return client.chat.completions.create(
                 model=st.session_state.config['model'],
                 messages=[{
     def _google_inference(self, client, input_data):
         """Performs inference using Google Generative AI API."""
         try:
             model = client(st.session_state.config['model'])  # Instantiate the model with the selected model name
             response = model.generate_content(
                 self._build_prompt(input_data),
                 generation_config = genai.types.GenerationConfig(temperature=st.session_state.config['temperature'])
             )
+            st.write("Google API Response:")  # Debugging: Print the raw response
+            st.write(response.text)
             return response
         except Exception as e:
             self.log_error(f"Google GenAI Inference Error: {e}")
     def _build_prompt(self, input_data):
         """Builds the prompt for the LLM based on the input data type."""
+        base = "Generate a JSON list of 3 dictionaries like this: \n"
+        base+= '[{"question":"Example Question", "answer":"Example Answer"},'
+        base+= '{"question":"Example Question", "answer":"Example Answer"},'
+        base+= '{"question":"Example Question", "answer":"Example Answer"}]'
+        base+= 'Here is the data:\n'
         if input_data['meta']['type'] == 'csv':
+            return base + "Data:\n" + input_data['text']
         elif input_data['meta']['type'] == 'api':
             return base + "API response:\n" + input_data['text']
         return base + input_data['text']
                      return [] # Return empty in case of parsing failure
             else:
                 # Assuming JSON response from other providers (OpenAI, Deepseek, Groq)
+                if not response or not response.choices or not response.choices[0].message.content:
+                    self.log_error("Empty or malformed response from LLM.")
+                    return []
+                try:
+                    json_output = json.loads(response.choices[0].message.content) # load the JSON data
+                    return json_output.get("qa_pairs", []) # Return the qa_pairs
+                except json.JSONDecodeError as e:
+                    self.log_error(f"JSON Parse Error: {e}.  Raw Response: {response.choices[0].message.content}")
+                    return []
         except Exception as e:
             self.log_error(f"Parse Error: {e}. Raw Response: {response}")
             return []
         st.session_state['api_key'] = api_key  #Store API Key
         model = st.selectbox("Model", provider_cfg["models"])
+        temp = st.slider("Temperature", 0.0, 1.0, DEFAULT_TEMPERATURE) #Lower
         # Update session config
         st.session_state.config.update({