|
|
|
""" |
|
Utilities for working with strings and text. |
|
|
|
Inheritance diagram: |
|
|
|
.. inheritance-diagram:: IPython.utils.text |
|
:parts: 3 |
|
""" |
|
|
|
import os |
|
import re |
|
import string |
|
import sys |
|
import textwrap |
|
from string import Formatter |
|
from pathlib import Path |
|
|
|
|
|
|
|
if sys.platform == 'win32': |
|
date_format = "%B %d, %Y" |
|
else: |
|
date_format = "%B %-d, %Y" |
|
|
|
class LSString(str): |
|
"""String derivative with a special access attributes. |
|
|
|
These are normal strings, but with the special attributes: |
|
|
|
.l (or .list) : value as list (split on newlines). |
|
.n (or .nlstr): original value (the string itself). |
|
.s (or .spstr): value as whitespace-separated string. |
|
.p (or .paths): list of path objects (requires path.py package) |
|
|
|
Any values which require transformations are computed only once and |
|
cached. |
|
|
|
Such strings are very useful to efficiently interact with the shell, which |
|
typically only understands whitespace-separated options for commands.""" |
|
|
|
def get_list(self): |
|
try: |
|
return self.__list |
|
except AttributeError: |
|
self.__list = self.split('\n') |
|
return self.__list |
|
|
|
l = list = property(get_list) |
|
|
|
def get_spstr(self): |
|
try: |
|
return self.__spstr |
|
except AttributeError: |
|
self.__spstr = self.replace('\n',' ') |
|
return self.__spstr |
|
|
|
s = spstr = property(get_spstr) |
|
|
|
def get_nlstr(self): |
|
return self |
|
|
|
n = nlstr = property(get_nlstr) |
|
|
|
def get_paths(self): |
|
try: |
|
return self.__paths |
|
except AttributeError: |
|
self.__paths = [Path(p) for p in self.split('\n') if os.path.exists(p)] |
|
return self.__paths |
|
|
|
p = paths = property(get_paths) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class SList(list): |
|
"""List derivative with a special access attributes. |
|
|
|
These are normal lists, but with the special attributes: |
|
|
|
* .l (or .list) : value as list (the list itself). |
|
* .n (or .nlstr): value as a string, joined on newlines. |
|
* .s (or .spstr): value as a string, joined on spaces. |
|
* .p (or .paths): list of path objects (requires path.py package) |
|
|
|
Any values which require transformations are computed only once and |
|
cached.""" |
|
|
|
def get_list(self): |
|
return self |
|
|
|
l = list = property(get_list) |
|
|
|
def get_spstr(self): |
|
try: |
|
return self.__spstr |
|
except AttributeError: |
|
self.__spstr = ' '.join(self) |
|
return self.__spstr |
|
|
|
s = spstr = property(get_spstr) |
|
|
|
def get_nlstr(self): |
|
try: |
|
return self.__nlstr |
|
except AttributeError: |
|
self.__nlstr = '\n'.join(self) |
|
return self.__nlstr |
|
|
|
n = nlstr = property(get_nlstr) |
|
|
|
def get_paths(self): |
|
try: |
|
return self.__paths |
|
except AttributeError: |
|
self.__paths = [Path(p) for p in self if os.path.exists(p)] |
|
return self.__paths |
|
|
|
p = paths = property(get_paths) |
|
|
|
def grep(self, pattern, prune = False, field = None): |
|
""" Return all strings matching 'pattern' (a regex or callable) |
|
|
|
This is case-insensitive. If prune is true, return all items |
|
NOT matching the pattern. |
|
|
|
If field is specified, the match must occur in the specified |
|
whitespace-separated field. |
|
|
|
Examples:: |
|
|
|
a.grep( lambda x: x.startswith('C') ) |
|
a.grep('Cha.*log', prune=1) |
|
a.grep('chm', field=-1) |
|
""" |
|
|
|
def match_target(s): |
|
if field is None: |
|
return s |
|
parts = s.split() |
|
try: |
|
tgt = parts[field] |
|
return tgt |
|
except IndexError: |
|
return "" |
|
|
|
if isinstance(pattern, str): |
|
pred = lambda x : re.search(pattern, x, re.IGNORECASE) |
|
else: |
|
pred = pattern |
|
if not prune: |
|
return SList([el for el in self if pred(match_target(el))]) |
|
else: |
|
return SList([el for el in self if not pred(match_target(el))]) |
|
|
|
def fields(self, *fields): |
|
""" Collect whitespace-separated fields from string list |
|
|
|
Allows quick awk-like usage of string lists. |
|
|
|
Example data (in var a, created by 'a = !ls -l'):: |
|
|
|
-rwxrwxrwx 1 ville None 18 Dec 14 2006 ChangeLog |
|
drwxrwxrwx+ 6 ville None 0 Oct 24 18:05 IPython |
|
|
|
* ``a.fields(0)`` is ``['-rwxrwxrwx', 'drwxrwxrwx+']`` |
|
* ``a.fields(1,0)`` is ``['1 -rwxrwxrwx', '6 drwxrwxrwx+']`` |
|
(note the joining by space). |
|
* ``a.fields(-1)`` is ``['ChangeLog', 'IPython']`` |
|
|
|
IndexErrors are ignored. |
|
|
|
Without args, fields() just split()'s the strings. |
|
""" |
|
if len(fields) == 0: |
|
return [el.split() for el in self] |
|
|
|
res = SList() |
|
for el in [f.split() for f in self]: |
|
lineparts = [] |
|
|
|
for fd in fields: |
|
try: |
|
lineparts.append(el[fd]) |
|
except IndexError: |
|
pass |
|
if lineparts: |
|
res.append(" ".join(lineparts)) |
|
|
|
return res |
|
|
|
def sort(self,field= None, nums = False): |
|
""" sort by specified fields (see fields()) |
|
|
|
Example:: |
|
|
|
a.sort(1, nums = True) |
|
|
|
Sorts a by second field, in numerical order (so that 21 > 3) |
|
|
|
""" |
|
|
|
|
|
if field is not None: |
|
dsu = [[SList([line]).fields(field), line] for line in self] |
|
else: |
|
dsu = [[line, line] for line in self] |
|
if nums: |
|
for i in range(len(dsu)): |
|
numstr = "".join([ch for ch in dsu[i][0] if ch.isdigit()]) |
|
try: |
|
n = int(numstr) |
|
except ValueError: |
|
n = 0 |
|
dsu[i][0] = n |
|
|
|
|
|
dsu.sort() |
|
return SList([t[1] for t in dsu]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def indent(instr,nspaces=4, ntabs=0, flatten=False): |
|
"""Indent a string a given number of spaces or tabstops. |
|
|
|
indent(str,nspaces=4,ntabs=0) -> indent str by ntabs+nspaces. |
|
|
|
Parameters |
|
---------- |
|
instr : basestring |
|
The string to be indented. |
|
nspaces : int (default: 4) |
|
The number of spaces to be indented. |
|
ntabs : int (default: 0) |
|
The number of tabs to be indented. |
|
flatten : bool (default: False) |
|
Whether to scrub existing indentation. If True, all lines will be |
|
aligned to the same indentation. If False, existing indentation will |
|
be strictly increased. |
|
|
|
Returns |
|
------- |
|
str|unicode : string indented by ntabs and nspaces. |
|
|
|
""" |
|
if instr is None: |
|
return |
|
ind = '\t'*ntabs+' '*nspaces |
|
if flatten: |
|
pat = re.compile(r'^\s*', re.MULTILINE) |
|
else: |
|
pat = re.compile(r'^', re.MULTILINE) |
|
outstr = re.sub(pat, ind, instr) |
|
if outstr.endswith(os.linesep+ind): |
|
return outstr[:-len(ind)] |
|
else: |
|
return outstr |
|
|
|
|
|
def list_strings(arg): |
|
"""Always return a list of strings, given a string or list of strings |
|
as input. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [7]: list_strings('A single string') |
|
Out[7]: ['A single string'] |
|
|
|
In [8]: list_strings(['A single string in a list']) |
|
Out[8]: ['A single string in a list'] |
|
|
|
In [9]: list_strings(['A','list','of','strings']) |
|
Out[9]: ['A', 'list', 'of', 'strings'] |
|
""" |
|
|
|
if isinstance(arg, str): |
|
return [arg] |
|
else: |
|
return arg |
|
|
|
|
|
def marquee(txt='',width=78,mark='*'): |
|
"""Return the input string centered in a 'marquee'. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [16]: marquee('A test',40) |
|
Out[16]: '**************** A test ****************' |
|
|
|
In [17]: marquee('A test',40,'-') |
|
Out[17]: '---------------- A test ----------------' |
|
|
|
In [18]: marquee('A test',40,' ') |
|
Out[18]: ' A test ' |
|
|
|
""" |
|
if not txt: |
|
return (mark*width)[:width] |
|
nmark = (width-len(txt)-2)//len(mark)//2 |
|
if nmark < 0: nmark =0 |
|
marks = mark*nmark |
|
return '%s %s %s' % (marks,txt,marks) |
|
|
|
|
|
ini_spaces_re = re.compile(r'^(\s+)') |
|
|
|
def num_ini_spaces(strng): |
|
"""Return the number of initial spaces in a string""" |
|
|
|
ini_spaces = ini_spaces_re.match(strng) |
|
if ini_spaces: |
|
return ini_spaces.end() |
|
else: |
|
return 0 |
|
|
|
|
|
def format_screen(strng): |
|
"""Format a string for screen printing. |
|
|
|
This removes some latex-type format codes.""" |
|
|
|
par_re = re.compile(r'\\$',re.MULTILINE) |
|
strng = par_re.sub('',strng) |
|
return strng |
|
|
|
|
|
def dedent(text): |
|
"""Equivalent of textwrap.dedent that ignores unindented first line. |
|
|
|
This means it will still dedent strings like: |
|
'''foo |
|
is a bar |
|
''' |
|
|
|
For use in wrap_paragraphs. |
|
""" |
|
|
|
if text.startswith('\n'): |
|
|
|
return textwrap.dedent(text) |
|
|
|
|
|
splits = text.split('\n',1) |
|
if len(splits) == 1: |
|
|
|
return textwrap.dedent(text) |
|
|
|
first, rest = splits |
|
|
|
rest = textwrap.dedent(rest) |
|
return '\n'.join([first, rest]) |
|
|
|
|
|
def wrap_paragraphs(text, ncols=80): |
|
"""Wrap multiple paragraphs to fit a specified width. |
|
|
|
This is equivalent to textwrap.wrap, but with support for multiple |
|
paragraphs, as separated by empty lines. |
|
|
|
Returns |
|
------- |
|
list of complete paragraphs, wrapped to fill `ncols` columns. |
|
""" |
|
paragraph_re = re.compile(r'\n(\s*\n)+', re.MULTILINE) |
|
text = dedent(text).strip() |
|
paragraphs = paragraph_re.split(text)[::2] |
|
out_ps = [] |
|
indent_re = re.compile(r'\n\s+', re.MULTILINE) |
|
for p in paragraphs: |
|
|
|
|
|
if indent_re.search(p) is None: |
|
|
|
p = textwrap.fill(p, ncols) |
|
out_ps.append(p) |
|
return out_ps |
|
|
|
|
|
def strip_email_quotes(text): |
|
"""Strip leading email quotation characters ('>'). |
|
|
|
Removes any combination of leading '>' interspersed with whitespace that |
|
appears *identically* in all lines of the input text. |
|
|
|
Parameters |
|
---------- |
|
text : str |
|
|
|
Examples |
|
-------- |
|
|
|
Simple uses:: |
|
|
|
In [2]: strip_email_quotes('> > text') |
|
Out[2]: 'text' |
|
|
|
In [3]: strip_email_quotes('> > text\\n> > more') |
|
Out[3]: 'text\\nmore' |
|
|
|
Note how only the common prefix that appears in all lines is stripped:: |
|
|
|
In [4]: strip_email_quotes('> > text\\n> > more\\n> more...') |
|
Out[4]: '> text\\n> more\\nmore...' |
|
|
|
So if any line has no quote marks ('>'), then none are stripped from any |
|
of them :: |
|
|
|
In [5]: strip_email_quotes('> > text\\n> > more\\nlast different') |
|
Out[5]: '> > text\\n> > more\\nlast different' |
|
""" |
|
lines = text.splitlines() |
|
strip_len = 0 |
|
|
|
for characters in zip(*lines): |
|
|
|
if len(set(characters)) > 1: |
|
break |
|
prefix_char = characters[0] |
|
|
|
if prefix_char in string.whitespace or prefix_char == ">": |
|
strip_len += 1 |
|
else: |
|
break |
|
|
|
text = "\n".join([ln[strip_len:] for ln in lines]) |
|
return text |
|
|
|
|
|
def strip_ansi(source): |
|
""" |
|
Remove ansi escape codes from text. |
|
|
|
Parameters |
|
---------- |
|
source : str |
|
Source to remove the ansi from |
|
""" |
|
return re.sub(r'\033\[(\d|;)+?m', '', source) |
|
|
|
|
|
class EvalFormatter(Formatter): |
|
"""A String Formatter that allows evaluation of simple expressions. |
|
|
|
Note that this version interprets a `:` as specifying a format string (as per |
|
standard string formatting), so if slicing is required, you must explicitly |
|
create a slice. |
|
|
|
This is to be used in templating cases, such as the parallel batch |
|
script templates, where simple arithmetic on arguments is useful. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [1]: f = EvalFormatter() |
|
In [2]: f.format('{n//4}', n=8) |
|
Out[2]: '2' |
|
|
|
In [3]: f.format("{greeting[slice(2,4)]}", greeting="Hello") |
|
Out[3]: 'll' |
|
""" |
|
def get_field(self, name, args, kwargs): |
|
v = eval(name, kwargs) |
|
return v, name |
|
|
|
|
|
|
|
|
|
|
|
class FullEvalFormatter(Formatter): |
|
"""A String Formatter that allows evaluation of simple expressions. |
|
|
|
Any time a format key is not found in the kwargs, |
|
it will be tried as an expression in the kwargs namespace. |
|
|
|
Note that this version allows slicing using [1:2], so you cannot specify |
|
a format string. Use :class:`EvalFormatter` to permit format strings. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [1]: f = FullEvalFormatter() |
|
In [2]: f.format('{n//4}', n=8) |
|
Out[2]: '2' |
|
|
|
In [3]: f.format('{list(range(5))[2:4]}') |
|
Out[3]: '[2, 3]' |
|
|
|
In [4]: f.format('{3*2}') |
|
Out[4]: '6' |
|
""" |
|
|
|
|
|
def vformat(self, format_string:str, args, kwargs)->str: |
|
result = [] |
|
for literal_text, field_name, format_spec, conversion in \ |
|
self.parse(format_string): |
|
|
|
|
|
if literal_text: |
|
result.append(literal_text) |
|
|
|
|
|
if field_name is not None: |
|
|
|
|
|
|
|
if format_spec: |
|
|
|
field_name = ':'.join([field_name, format_spec]) |
|
|
|
|
|
|
|
obj = eval(field_name, kwargs) |
|
|
|
|
|
obj = self.convert_field(obj, conversion) |
|
|
|
|
|
result.append(self.format_field(obj, '')) |
|
|
|
return ''.join(result) |
|
|
|
|
|
class DollarFormatter(FullEvalFormatter): |
|
"""Formatter allowing Itpl style $foo replacement, for names and attribute |
|
access only. Standard {foo} replacement also works, and allows full |
|
evaluation of its arguments. |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [1]: f = DollarFormatter() |
|
In [2]: f.format('{n//4}', n=8) |
|
Out[2]: '2' |
|
|
|
In [3]: f.format('23 * 76 is $result', result=23*76) |
|
Out[3]: '23 * 76 is 1748' |
|
|
|
In [4]: f.format('$a or {b}', a=1, b=2) |
|
Out[4]: '1 or 2' |
|
""" |
|
_dollar_pattern_ignore_single_quote = re.compile(r"(.*?)\$(\$?[\w\.]+)(?=([^']*'[^']*')*[^']*$)") |
|
def parse(self, fmt_string): |
|
for literal_txt, field_name, format_spec, conversion \ |
|
in Formatter.parse(self, fmt_string): |
|
|
|
|
|
continue_from = 0 |
|
txt = "" |
|
for m in self._dollar_pattern_ignore_single_quote.finditer(literal_txt): |
|
new_txt, new_field = m.group(1,2) |
|
|
|
if new_field.startswith("$"): |
|
txt += new_txt + new_field |
|
else: |
|
yield (txt + new_txt, new_field, "", None) |
|
txt = "" |
|
continue_from = m.end() |
|
|
|
|
|
yield (txt + literal_txt[continue_from:], field_name, format_spec, conversion) |
|
|
|
def __repr__(self): |
|
return "<DollarFormatter>" |
|
|
|
|
|
|
|
|
|
|
|
def _col_chunks(l, max_rows, row_first=False): |
|
"""Yield successive max_rows-sized column chunks from l.""" |
|
if row_first: |
|
ncols = (len(l) // max_rows) + (len(l) % max_rows > 0) |
|
for i in range(ncols): |
|
yield [l[j] for j in range(i, len(l), ncols)] |
|
else: |
|
for i in range(0, len(l), max_rows): |
|
yield l[i:(i + max_rows)] |
|
|
|
|
|
def _find_optimal(rlist, row_first=False, separator_size=2, displaywidth=80): |
|
"""Calculate optimal info to columnize a list of string""" |
|
for max_rows in range(1, len(rlist) + 1): |
|
col_widths = list(map(max, _col_chunks(rlist, max_rows, row_first))) |
|
sumlength = sum(col_widths) |
|
ncols = len(col_widths) |
|
if sumlength + separator_size * (ncols - 1) <= displaywidth: |
|
break |
|
return {'num_columns': ncols, |
|
'optimal_separator_width': (displaywidth - sumlength) // (ncols - 1) if (ncols - 1) else 0, |
|
'max_rows': max_rows, |
|
'column_widths': col_widths |
|
} |
|
|
|
|
|
def _get_or_default(mylist, i, default=None): |
|
"""return list item number, or default if don't exist""" |
|
if i >= len(mylist): |
|
return default |
|
else : |
|
return mylist[i] |
|
|
|
|
|
def compute_item_matrix(items, row_first=False, empty=None, *args, **kwargs) : |
|
"""Returns a nested list, and info to columnize items |
|
|
|
Parameters |
|
---------- |
|
items |
|
list of strings to columize |
|
row_first : (default False) |
|
Whether to compute columns for a row-first matrix instead of |
|
column-first (default). |
|
empty : (default None) |
|
default value to fill list if needed |
|
separator_size : int (default=2) |
|
How much characters will be used as a separation between each columns. |
|
displaywidth : int (default=80) |
|
The width of the area onto which the columns should enter |
|
|
|
Returns |
|
------- |
|
strings_matrix |
|
nested list of string, the outer most list contains as many list as |
|
rows, the innermost lists have each as many element as columns. If the |
|
total number of elements in `items` does not equal the product of |
|
rows*columns, the last element of some lists are filled with `None`. |
|
dict_info |
|
some info to make columnize easier: |
|
|
|
num_columns |
|
number of columns |
|
max_rows |
|
maximum number of rows (final number may be less) |
|
column_widths |
|
list of with of each columns |
|
optimal_separator_width |
|
best separator width between columns |
|
|
|
Examples |
|
-------- |
|
:: |
|
|
|
In [1]: l = ['aaa','b','cc','d','eeeee','f','g','h','i','j','k','l'] |
|
In [2]: list, info = compute_item_matrix(l, displaywidth=12) |
|
In [3]: list |
|
Out[3]: [['aaa', 'f', 'k'], ['b', 'g', 'l'], ['cc', 'h', None], ['d', 'i', None], ['eeeee', 'j', None]] |
|
In [4]: ideal = {'num_columns': 3, 'column_widths': [5, 1, 1], 'optimal_separator_width': 2, 'max_rows': 5} |
|
In [5]: all((info[k] == ideal[k] for k in ideal.keys())) |
|
Out[5]: True |
|
""" |
|
info = _find_optimal(list(map(len, items)), row_first, *args, **kwargs) |
|
nrow, ncol = info['max_rows'], info['num_columns'] |
|
if row_first: |
|
return ([[_get_or_default(items, r * ncol + c, default=empty) for c in range(ncol)] for r in range(nrow)], info) |
|
else: |
|
return ([[_get_or_default(items, c * nrow + r, default=empty) for c in range(ncol)] for r in range(nrow)], info) |
|
|
|
|
|
def columnize(items, row_first=False, separator=" ", displaywidth=80, spread=False): |
|
"""Transform a list of strings into a single string with columns. |
|
|
|
Parameters |
|
---------- |
|
items : sequence of strings |
|
The strings to process. |
|
row_first : (default False) |
|
Whether to compute columns for a row-first matrix instead of |
|
column-first (default). |
|
separator : str, optional [default is two spaces] |
|
The string that separates columns. |
|
displaywidth : int, optional [default is 80] |
|
Width of the display in number of characters. |
|
|
|
Returns |
|
------- |
|
The formatted string. |
|
""" |
|
if not items: |
|
return '\n' |
|
matrix, info = compute_item_matrix(items, row_first=row_first, separator_size=len(separator), displaywidth=displaywidth) |
|
if spread: |
|
separator = separator.ljust(int(info['optimal_separator_width'])) |
|
fmatrix = [filter(None, x) for x in matrix] |
|
sjoin = lambda x : separator.join([ y.ljust(w, ' ') for y, w in zip(x, info['column_widths'])]) |
|
return '\n'.join(map(sjoin, fmatrix))+'\n' |
|
|
|
|
|
def get_text_list(list_, last_sep=' and ', sep=", ", wrap_item_with=""): |
|
""" |
|
Return a string with a natural enumeration of items |
|
|
|
>>> get_text_list(['a', 'b', 'c', 'd']) |
|
'a, b, c and d' |
|
>>> get_text_list(['a', 'b', 'c'], ' or ') |
|
'a, b or c' |
|
>>> get_text_list(['a', 'b', 'c'], ', ') |
|
'a, b, c' |
|
>>> get_text_list(['a', 'b'], ' or ') |
|
'a or b' |
|
>>> get_text_list(['a']) |
|
'a' |
|
>>> get_text_list([]) |
|
'' |
|
>>> get_text_list(['a', 'b'], wrap_item_with="`") |
|
'`a` and `b`' |
|
>>> get_text_list(['a', 'b', 'c', 'd'], " = ", sep=" + ") |
|
'a + b + c = d' |
|
""" |
|
if len(list_) == 0: |
|
return '' |
|
if wrap_item_with: |
|
list_ = ['%s%s%s' % (wrap_item_with, item, wrap_item_with) for |
|
item in list_] |
|
if len(list_) == 1: |
|
return list_[0] |
|
return '%s%s%s' % ( |
|
sep.join(i for i in list_[:-1]), |
|
last_sep, list_[-1]) |
|
|