|
import re |
|
from services.model_visitor import ModelVisitor |
|
|
|
|
|
class IbmExtractCodeblock(ModelVisitor): |
|
|
|
def visit(self, _, data): |
|
return self._get_code_block(data) |
|
|
|
def _get_code_block(self, data): |
|
r""" |
|
Extracts text blocks from the input string based on a specific pattern. |
|
Args: |
|
data (str): The input string containing text blocks. |
|
Returns: |
|
str: A text block of output which contains code extracted from the input string. |
|
Regex Pattern: |
|
(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n``` |
|
- (?:### Output: ([\s\S]*?)): This part matches patterns that start with '### Output:' |
|
followed by any characters including newlines, capturing them within a group. |
|
- (?:\<\|endoftext\|\>|\Z): This part matches either the string <|endoftext|> |
|
or the end of the string (\Z). |
|
- |: This is an OR operator, meaning the regex will match either the pattern |
|
before or after it. |
|
- ```(?:\w+)?\n(.*?)\n```: This part matches patterns enclosed within backticks (```), |
|
possibly preceded by one or more word characters (\w+), capturing any characters |
|
including newlines. |
|
""" |
|
pattern = r'(?:### Output: ([\s\S]*?))(?:\<\|endoftext\|\>|\Z)|```(?:\w+)?\n(.*?)\n```' |
|
matches = re.findall(pattern, data, re.DOTALL) |
|
code = [] |
|
for match in matches: |
|
if match[0]: |
|
code.append(match[0].strip()) |
|
elif match[1]: |
|
code.append(match[1].strip()) |
|
return ''.join(code) |
|
|