Spaces:

YALCINKAYA
/

opsgenius3

Sleeping

YALCINKAYA commited on Oct 23, 2024

Commit

ed324ed

1 Parent(s): b1d9e55

bug fix in extract_relevant_text

Files changed (1) hide show

app.py CHANGED Viewed

@@ -34,22 +34,30 @@ def get_model_and_tokenizer(model_id):
 def extract_relevant_text(response):
     """
-    This function extracts the first 'user' and 'assistant' blocks between
-    <|im_start|> and <|im_end|> in the generated response.
     """
     # Regex to match content between <|im_start|> and <|im_end|> tags
     pattern = re.compile(r"<\|im_start\|>(.*?)<\|im_end\|>", re.DOTALL)
     matches = pattern.findall(response)
-    if len(matches) < 2:
-        return "Unable to extract sufficient data from the response."
-    # Assuming the first match is user and the second match is assistant
-    user_message = matches[0].strip()  # First <|im_start|> block
-    assistant_message = matches[1].strip()  # Second <|im_start|> block
-    # Format the extracted result
-    return f"user: {user_message}\nassistant: {assistant_message}"
 def generate_response(user_input, model_id):
     prompt = formatted_prompt(user_input)

 def extract_relevant_text(response):
     """
+    This function extracts the first complete 'user' and 'assistant' blocks
+    between <|im_start|> and <|im_end|> in the generated response.
+    If the tags are corrupted, it returns the text up to the first <|im_end|> tag.
     """
     # Regex to match content between <|im_start|> and <|im_end|> tags
     pattern = re.compile(r"<\|im_start\|>(.*?)<\|im_end\|>", re.DOTALL)
     matches = pattern.findall(response)
+    # Debugging: print the matches found
+    print("Matches found:", matches)
+    # If complete matches found, extract them
+    if len(matches) >= 2:
+        user_message = matches[0].strip()  # First <|im_start|> block
+        assistant_message = matches[1].strip()  # Second <|im_start|> block
+        return f"user: {user_message}\nassistant: {assistant_message}"
+    # If no complete blocks found, check for a partial extraction
+    if '<|im_end|>' in response:
+        # Extract everything before the first <|im_end|>
+        partial_response = response.split('<|im_end|>')[0].strip()
+        return f"Partial Response: {partial_response}"
+    return "No complete blocks found. Please check the format of the response."
 def generate_response(user_input, model_id):
     prompt = formatted_prompt(user_input)