File size: 1,683 Bytes
6fadbbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import re
from services.model_visitor import ModelVisitor


class IbmExtractCodeblock(ModelVisitor):

    def visit(self, _, data):
        return self._get_code_block(data)

    def _get_code_block(self, data):
        r"""
            Extracts text blocks from the input string based on a specific pattern.
            Args:
                data (str): The input string containing text blocks.
            Returns:
                str: A text block of output which contains code extracted from the input string.
            Regex Pattern:
            (?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```
            - (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' 
            followed by any characters including newlines, capturing them within a group.
            - (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|>
            or the end of the string (\Z).
            - |: This is an OR operator, meaning the regex will match either the pattern 
            before or after it.
            - ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), 
            possibly preceded by one or more word characters (\w+), capturing any characters 
            including newlines.
        """
        pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```'
        matches = re.findall(pattern, data, re.DOTALL)
        code = []
        for match in matches:
            if match[0]:
                code.append(match[0].strip())
            elif match[1]:
                code.append(match[1].strip())
        return ''.join(code)