Merge pull request #1053 from ThomasWaldmann/wcswidth

add swidth call, fixes #1051
This commit is contained in:
enkore 2016-05-19 00:29:14 +02:00
commit 13a4e40024
13 changed files with 151 additions and 112 deletions

View file

@ -9,6 +9,7 @@ from .key import key_factory
from .remote import cache_if_remote
import os
from shutil import get_terminal_size
import socket
import stat
import sys
@ -19,13 +20,13 @@ from .compress import COMPR_BUFFER
from .constants import * # NOQA
from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
parse_timestamp, to_localtime, format_time, format_timedelta, safe_encode, safe_decode, \
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
Manifest, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
CompressionDecider1, CompressionDecider2, CompressionSpec, \
IntegrityError
from .repository import Repository
from .platform import acl_get, acl_set, set_flags, get_flags
from .platform import acl_get, acl_set, set_flags, get_flags, swidth
from .chunker import Chunker
from .hashindex import ChunkIndex, ChunkIndexEntry
from .cache import ChunkListEntry
@ -37,6 +38,58 @@ flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
class Statistics:
def __init__(self):
self.osize = self.csize = self.usize = self.nfiles = 0
self.last_progress = 0 # timestamp when last progress was shown
def update(self, size, csize, unique):
self.osize += size
self.csize += csize
if unique:
self.usize += csize
summary = """\
Original size Compressed size Deduplicated size
{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"""
def __str__(self):
return self.summary.format(stats=self, label='This archive:')
def __repr__(self):
return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(
cls=type(self).__name__, hash=id(self), self=self)
@property
def osize_fmt(self):
return format_file_size(self.osize)
@property
def usize_fmt(self):
return format_file_size(self.usize)
@property
def csize_fmt(self):
return format_file_size(self.csize)
def show_progress(self, item=None, final=False, stream=None, dt=None):
now = time.time()
if dt is None or now - self.last_progress > dt:
self.last_progress = now
columns, lines = get_terminal_size()
if not final:
msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
path = remove_surrogates(item[b'path']) if item else ''
space = columns - swidth(msg)
if space < swidth('...') + swidth(path):
path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
msg += "{0:<{space}}".format(path, space=space)
else:
msg = ' ' * columns
print(msg, file=stream or sys.stderr, end="\r", flush=True)
class DownloadPipeline:
def __init__(self, repository, key):

View file

@ -22,7 +22,7 @@ from . import __version__
from .helpers import Error, location_validator, archivename_validator, format_time, format_file_size, \
parse_pattern, PathPrefixPattern, to_localtime, timestamp, \
get_cache_dir, prune_within, prune_split, bin_to_hex, safe_encode, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, \
dir_is_tagged, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
log_multi, PatternMatcher, ItemFormatter
from .logger import create_logger, setup_logging
@ -34,7 +34,7 @@ from .repository import Repository
from .cache import Cache
from .constants import * # NOQA
from .key import key_creator, RepoKey, PassphraseKey
from .archive import Archive, ArchiveChecker, ArchiveRecreater
from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
from .remote import RepositoryServer, RemoteRepository, cache_if_remote
from .selftest import selftest
from .hashindex import ChunkIndexEntry

View file

@ -11,7 +11,6 @@ import stat
import textwrap
import pwd
import re
from shutil import get_terminal_size
import sys
from string import Formatter
import platform
@ -172,57 +171,6 @@ def prune_split(archives, pattern, n, skip=[]):
return keep
class Statistics:
def __init__(self):
self.osize = self.csize = self.usize = self.nfiles = 0
self.last_progress = 0 # timestamp when last progress was shown
def update(self, size, csize, unique):
self.osize += size
self.csize += csize
if unique:
self.usize += csize
summary = """\
Original size Compressed size Deduplicated size
{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"""
def __str__(self):
return self.summary.format(stats=self, label='This archive:')
def __repr__(self):
return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(cls=type(self).__name__, hash=id(self), self=self)
@property
def osize_fmt(self):
return format_file_size(self.osize)
@property
def usize_fmt(self):
return format_file_size(self.usize)
@property
def csize_fmt(self):
return format_file_size(self.csize)
def show_progress(self, item=None, final=False, stream=None, dt=None):
now = time.time()
if dt is None or now - self.last_progress > dt:
self.last_progress = now
columns, lines = get_terminal_size()
if not final:
msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
path = remove_surrogates(item[b'path']) if item else ''
space = columns - len(msg)
if space < len('...') + len(path):
path = '%s...%s' % (path[:(space // 2) - len('...')], path[-space // 2:])
msg += "{0:<{space}}".format(path, space=space)
else:
msg = ' ' * columns
print(msg, file=stream or sys.stderr, end="\r", flush=True)
def get_home_dir():
"""Get user's home directory while preferring a possibly set HOME
environment variable

View file

@ -1,10 +1,10 @@
import sys
from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, API_VERSION
from .platform_base import acl_get, acl_set, SyncFile, sync_dir, set_flags, get_flags, swidth, API_VERSION
if sys.platform.startswith('linux'): # pragma: linux only
from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, API_VERSION
from .platform_linux import acl_get, acl_set, SyncFile, set_flags, get_flags, swidth, API_VERSION
elif sys.platform.startswith('freebsd'): # pragma: freebsd only
from .platform_freebsd import acl_get, acl_set, API_VERSION
from .platform_freebsd import acl_get, acl_set, swidth, API_VERSION
elif sys.platform == 'darwin': # pragma: darwin only
from .platform_darwin import acl_get, acl_set, API_VERSION
from .platform_darwin import acl_get, acl_set, swidth, API_VERSION

View file

@ -90,3 +90,11 @@ class SyncFile:
self.sync()
self.fd.close()
sync_dir(os.path.dirname(self.fd.name))
def swidth(s):
"""terminal output width of string <s>
For western scripts, this is just len(s), but for cjk glyphs, 2 cells are used.
"""
return len(s)

View file

@ -1,5 +1,6 @@
import os
from .helpers import user2uid, group2gid, safe_decode, safe_encode
from .platform_posix import swidth
API_VERSION = 3

View file

@ -1,5 +1,6 @@
import os
from .helpers import posix_acl_use_stored_uid_gid, safe_encode, safe_decode
from .platform_posix import swidth
API_VERSION = 3

View file

@ -5,6 +5,8 @@ import stat
from .helpers import posix_acl_use_stored_uid_gid, user2uid, group2gid, safe_decode, safe_encode
from .platform_base import SyncFile as BaseSyncFile
from .platform_posix import swidth
from libc cimport errno
API_VERSION = 3

5
borg/platform_posix.pyx Normal file
View file

@ -0,0 +1,5 @@
cdef extern from "wchar.h":
cdef int wcswidth(const Py_UNICODE *str, size_t n)
def swidth(s):
return wcswidth(s, len(s))

View file

@ -1,14 +1,64 @@
import os
from datetime import datetime, timezone
from io import StringIO
from unittest.mock import Mock
import pytest
import msgpack
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
from ..key import PlaintextKey
from ..helpers import Manifest
from . import BaseTestCase
@pytest.fixture()
def stats():
stats = Statistics()
stats.update(20, 10, unique=True)
return stats
def test_stats_basic(stats):
assert stats.osize == 20
assert stats.csize == stats.usize == 10
stats.update(20, 10, unique=False)
assert stats.osize == 40
assert stats.csize == 20
assert stats.usize == 10
def tests_stats_progress(stats, columns=80):
os.environ['COLUMNS'] = str(columns)
out = StringIO()
stats.show_progress(stream=out)
s = '20 B O 10 B C 10 B D 0 N '
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
out = StringIO()
stats.update(10**3, 0, unique=False)
stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
out = StringIO()
stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
def test_stats_format(stats):
assert str(stats) == """\
Original size Compressed size Deduplicated size
This archive: 20 B 10 B 10 B"""
s = "{0.osize_fmt}".format(stats)
assert s == "20 B"
# kind of redundant, but id is variable so we can't match reliably
assert repr(stats) == '<Statistics object at {:#x} (20, 10, 10)>'.format(id(stats))
class MockCache:
def __init__(self):

View file

@ -1,7 +1,6 @@
import hashlib
from time import mktime, strptime
from datetime import datetime, timezone, timedelta
from io import StringIO
import os
import pytest
@ -11,7 +10,7 @@ import msgpack.fallback
import time
from ..helpers import Location, format_file_size, format_timedelta, make_path_safe, clean_lines, \
prune_within, prune_split, get_cache_dir, get_keys_dir, Statistics, is_slow_msgpack, \
prune_within, prune_split, get_cache_dir, get_keys_dir, is_slow_msgpack, \
yes, TRUISH, FALSISH, DEFAULTISH, \
StableDict, int_to_bigint, bigint_to_int, bin_to_hex, parse_timestamp, ChunkerParams, Chunk, \
ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
@ -629,53 +628,6 @@ def test_get_keys_dir():
os.environ['BORG_KEYS_DIR'] = old_env
@pytest.fixture()
def stats():
stats = Statistics()
stats.update(20, 10, unique=True)
return stats
def test_stats_basic(stats):
assert stats.osize == 20
assert stats.csize == stats.usize == 10
stats.update(20, 10, unique=False)
assert stats.osize == 40
assert stats.csize == 20
assert stats.usize == 10
def tests_stats_progress(stats, columns=80):
os.environ['COLUMNS'] = str(columns)
out = StringIO()
stats.show_progress(stream=out)
s = '20 B O 10 B C 10 B D 0 N '
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
out = StringIO()
stats.update(10**3, 0, unique=False)
stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
out = StringIO()
stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
buf = ' ' * (columns - len(s))
assert out.getvalue() == s + buf + "\r"
def test_stats_format(stats):
assert str(stats) == """\
Original size Compressed size Deduplicated size
This archive: 20 B 10 B 10 B"""
s = "{0.osize_fmt}".format(stats)
assert s == "20 B"
# kind of redundant, but id is variable so we can't match reliably
assert repr(stats) == '<Statistics object at {:#x} (20, 10, 10)>'.format(id(stats))
def test_file_size():
"""test the size formatting routines"""
si_size_map = {

View file

@ -4,7 +4,7 @@ import sys
import tempfile
import unittest
from ..platform import acl_get, acl_set
from ..platform import acl_get, acl_set, swidth
from . import BaseTestCase
@ -138,3 +138,16 @@ class PlatformDarwinTestCase(BaseTestCase):
self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
@unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')
class PlatformPosixTestCase(BaseTestCase):
def test_swidth_ascii(self):
self.assert_equal(swidth("borg"), 4)
def test_swidth_cjk(self):
self.assert_equal(swidth("バックアップ"), 6 * 2)
def test_swidth_mixed(self):
self.assert_equal(swidth("borgバックアップ"), 4 + 6 * 2)

View file

@ -40,6 +40,7 @@ compress_source = 'borg/compress.pyx'
crypto_source = 'borg/crypto.pyx'
chunker_source = 'borg/chunker.pyx'
hashindex_source = 'borg/hashindex.pyx'
platform_posix_source = 'borg/platform_posix.pyx'
platform_linux_source = 'borg/platform_linux.pyx'
platform_darwin_source = 'borg/platform_darwin.pyx'
platform_freebsd_source = 'borg/platform_freebsd.pyx'
@ -60,6 +61,7 @@ try:
'borg/crypto.c',
'borg/chunker.c', 'borg/_chunker.c',
'borg/hashindex.c', 'borg/_hashindex.c',
'borg/platform_posix.c',
'borg/platform_linux.c',
'borg/platform_freebsd.c',
'borg/platform_darwin.c',
@ -75,13 +77,14 @@ except ImportError:
crypto_source = crypto_source.replace('.pyx', '.c')
chunker_source = chunker_source.replace('.pyx', '.c')
hashindex_source = hashindex_source.replace('.pyx', '.c')
platform_posix_source = platform_posix_source.replace('.pyx', '.c')
platform_linux_source = platform_linux_source.replace('.pyx', '.c')
platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
from distutils.command.build_ext import build_ext
if not on_rtd and not all(os.path.exists(path) for path in [
compress_source, crypto_source, chunker_source, hashindex_source,
platform_linux_source, platform_freebsd_source]):
platform_posix_source, platform_linux_source, platform_freebsd_source]):
raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version.')
@ -286,6 +289,9 @@ if not on_rtd:
Extension('borg.chunker', [chunker_source]),
Extension('borg.hashindex', [hashindex_source])
]
if sys.platform.startswith(('linux', 'freebsd', 'darwin')):
ext_modules.append(Extension('borg.platform_posix', [platform_posix_source]))
if sys.platform == 'linux':
ext_modules.append(Extension('borg.platform_linux', [platform_linux_source], libraries=['acl']))
elif sys.platform.startswith('freebsd'):