Spaces:

baconnier
/

prompt-plus-plus

Running

File size: 9,305 Bytes

import json
import re
from typing import Optional, Dict, Any, Union, List, Tuple
from pydantic import BaseModel, Field, validator
from huggingface_hub import InferenceClient
from huggingface_hub.errors import HfHubHTTPError
from variables import *

class LLMResponse(BaseModel):
    initial_prompt_evaluation: str = Field(..., description="Evaluation of the initial prompt")
    refined_prompt: str = Field(..., description="The refined version of the prompt")
    explanation_of_refinements: Union[str, List[str]] = Field(..., description="Explanation of the refinements made")
    response_content: Optional[Dict[str, Any]] = Field(None, description="Raw response content")

    @validator('initial_prompt_evaluation', 'refined_prompt')
    def clean_text_fields(cls, v):
        if isinstance(v, str):
            return v.strip().replace('\\n', '\n').replace('\\"', '"')
        return v

    @validator('explanation_of_refinements')
    def clean_refinements(cls, v):
        if isinstance(v, str):
            return v.strip().replace('\\n', '\n').replace('\\"', '"')
        elif isinstance(v, list):
            return [item.strip().replace('\\n', '\n').replace('\\"', '"').replace('•', '-') 
                   for item in v if isinstance(item, str)]
        return v

class PromptRefiner:
    def __init__(self, api_token: str, meta_prompts: dict):
        self.client = InferenceClient(token=api_token, timeout=120)
        self.meta_prompts = meta_prompts

    def _clean_json_string(self, content: str) -> str:
        """Clean and prepare JSON string for parsing."""
        content = content.replace('•', '-')  # Replace bullet points
        content = re.sub(r'\s+', ' ', content)  # Normalize whitespace
        content = content.replace('\\"', '"')  # Fix escaped quotes
        return content.strip()

    def _parse_response(self, response_content: str) -> dict:
        """Parse the LLM response with enhanced error handling."""
        try:
            # Extract content between <json> tags
            json_match = re.search(r'<json>\s*(.*?)\s*</json>', response_content, re.DOTALL)
            if json_match:
                json_str = self._clean_json_string(json_match.group(1))
                try:
                    # Try parsing the cleaned JSON
                    parsed_json = json.loads(json_str)
                    if isinstance(parsed_json, str):
                        parsed_json = json.loads(parsed_json)
                    
                    return {
                        "initial_prompt_evaluation": parsed_json.get("initial_prompt_evaluation", ""),
                        "refined_prompt": parsed_json.get("refined_prompt", ""),
                        "explanation_of_refinements": parsed_json.get("explanation_of_refinements", ""),
                        "response_content": parsed_json
                    }
                except json.JSONDecodeError:
                    # If JSON parsing fails, try regex parsing
                    return self._parse_with_regex(json_str)
            
            # If no JSON tags found, try regex parsing
            return self._parse_with_regex(response_content)

        except Exception as e:
            print(f"Error parsing response: {str(e)}")
            print(f"Raw content: {response_content}")
            return self._create_error_dict(str(e))

    def _parse_with_regex(self, content: str) -> dict:
        """Parse content using regex when JSON parsing fails."""
        output = {}
        
        # Handle explanation_of_refinements list format
        refinements_match = re.search(r'"explanation_of_refinements":\s*\[(.*?)\]', content, re.DOTALL)
        if refinements_match:
            refinements_str = refinements_match.group(1)
            refinements = [
                item.strip().strip('"').strip("'").replace('•', '-')
                for item in re.findall(r'[•"]([^"•]+)[•"]', refinements_str)
            ]
            output["explanation_of_refinements"] = refinements
        else:
            # Try single string format
            pattern = r'"explanation_of_refinements":\s*"(.*?)"(?:,|\})'
            match = re.search(pattern, content, re.DOTALL)
            output["explanation_of_refinements"] = match.group(1).strip() if match else ""

        # Extract other fields
        for key in ["initial_prompt_evaluation", "refined_prompt"]:
            pattern = rf'"{key}":\s*"(.*?)"(?:,|\}})'
            match = re.search(pattern, content, re.DOTALL)
            output[key] = match.group(1).strip() if match else ""
        
        output["response_content"] = content
        return output

    def _create_error_dict(self, error_message: str) -> dict:
        """Create a standardized error response dictionary."""
        return {
            "initial_prompt_evaluation": f"Error parsing response: {error_message}",
            "refined_prompt": "",
            "explanation_of_refinements": "",
            "response_content": {"error": error_message}
        }

    def refine_prompt(self, prompt: str, meta_prompt_choice: str) -> Tuple[str, str, str, dict]:
        """Refine the given prompt using the selected meta prompt."""
        try:
            selected_meta_prompt = self.meta_prompts.get(
                meta_prompt_choice, 
                self.meta_prompts["star"]
            )
            
            messages = [
                {
                    "role": "system", 
                    "content": 'You are an expert at refining and extending prompts. Given a basic prompt, provide a more relevant and detailed prompt.'
                },
                {
                    "role": "user", 
                    "content": selected_meta_prompt.replace("[Insert initial prompt here]", prompt)
                }
            ]
            
            response = self.client.chat_completion(
                model=prompt_refiner_model,
                messages=messages,
                max_tokens=3000,
                temperature=0.8
            )
            
            response_content = response.choices[0].message.content.strip()
            result = self._parse_response(response_content)
            
            try:
                llm_response = LLMResponse(**result)
                return (
                    llm_response.initial_prompt_evaluation,
                    llm_response.refined_prompt,
                    llm_response.explanation_of_refinements,
                    llm_response.dict()
                )
            except Exception as e:
                print(f"Error creating LLMResponse: {e}")
                return self._create_error_response(f"Error validating response: {str(e)}")

        except HfHubHTTPError as e:
            return self._create_error_response("Model timeout. Please try again later.")
        except Exception as e:
            return self._create_error_response(f"Unexpected error: {str(e)}")

    def _create_error_response(self, error_message: str) -> Tuple[str, str, str, dict]:
        """Create a standardized error response tuple."""
        return (
            f"Error: {error_message}",
            "The selected model is currently unavailable.",
            "An error occurred during processing.",
            {"error": error_message}
        )

    def apply_prompt(self, prompt: str, model: str) -> str:
        """Apply formatting to the prompt using the specified model."""
        try:
            messages = [
                {
                    "role": "system",
                    "content": """You are a markdown formatting expert. Format your responses with proper spacing and structure following these rules:
                        1. Paragraph Spacing:
                        - Add TWO blank lines between major sections (##)
                        - Add ONE blank line between subsections (###)
                        - Add ONE blank line between paragraphs within sections
                        - Add ONE blank line before and after lists
                        - Add ONE blank line before and after code blocks
                        - Add ONE blank line before and after blockquotes
                        
                        2. Section Formatting:
                        # Title
                        
                        ## Major Section
                        
                        [blank line]
                        Content paragraph 1
                        [blank line]
                        Content paragraph 2
                        [blank line]"""
                },
                {
                    "role": "user",
                    "content": prompt
                }
            ]
    
            response = self.client.chat_completion(
                model=model,
                messages=messages,
                max_tokens=3000,
                temperature=0.8,
                stream=True
            )
            
            full_response = ""
            for chunk in response:
                if chunk.choices[0].delta.content is not None:
                    full_response += chunk.choices[0].delta.content
                    
            return full_response.replace('\n\n', '\n').strip()
                
        except Exception as e:
            return f"Error: {str(e)}"