]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
ceph-volume: terminal: encode unicode when writing to stdout
authorKefu Chai <kchai@redhat.com>
Sun, 24 Mar 2019 09:11:31 +0000 (17:11 +0800)
committerJan Fajerski <jfajerski@suse.com>
Mon, 12 Aug 2019 12:24:43 +0000 (14:24 +0200)
python determins the encoding of stdout and stderr based on the LC_CTYPE
and PYTHONIOENCODING env variable, by default, python3's sys.stdout uses
'utf-8' as its encoding, so it will be able to write unicode string even
the stdout is not attached to a tty device. but when it comes to
python2, it will default to ascii if neither of these variabls is set.
so, if we are writing unicode using `_Write` in an environment where
LC_CTYPE and/or PYTHONIOENCODING are using non UTF-8 encoding, it chokes
by raising `UnicodeEncodeError` exception.

in this change, we add a wrapper around `_Write._writer` so it is able
to write unicode string in such a non-unicode-friendly environment.

for more info related the encoding of stdout and stderr, see
https://docs.python.org/3/using/cmdline.html#envvar-PYTHONIOENCODING .

Signed-off-by: Alfredo Deza <adeza@redhat.com>
Signed-off-by: Kefu Chai <kchai@redhat.com>
(cherry picked from commit 77912c0c71874a23785d3bdd109ad61e4f4c0e28)

src/ceph-volume/ceph_volume/terminal.py
src/ceph-volume/ceph_volume/tests/test_terminal.py

index 7fe8555148881749d7a522ab185c35f442332289..11bd2a1377fd94d3b598cd5b4028944a064de3f5 100644 (file)
@@ -1,3 +1,4 @@
+import codecs
 import logging
 import sys
 
@@ -80,11 +81,33 @@ yellow_arrow = yellow('--> ')
 class _Write(object):
 
     def __init__(self, _writer=None, prefix='', suffix='', flush=False):
-        self._writer = _writer or sys.stdout
+        if _writer is None:
+            _writer = sys.stdout
+        self._writer = _Write._unicode_output_stream(_writer)
+        if _writer is sys.stdout:
+            sys.stdout = self._writer
         self.suffix = suffix
         self.prefix = prefix
         self.flush = flush
 
+    @staticmethod
+    def _unicode_output_stream(stream):
+        # wrapper for given stream, so it can write unicode without throwing
+        # exception
+        # sys.stdout.encoding is None if !isatty
+        encoding = stream.encoding or ''
+        if encoding.upper() in ('UTF-8', 'UTF8'):
+            # already using unicode encoding, nothing to do
+            return stream
+        encoding = encoding or 'UTF-8'
+        if sys.version_info >= (3, 0):
+            # try to use whatever writer class the stream was
+            return stream.__class__(stream.buffer, encoding, 'replace',
+                                    stream.newlines, stream.line_buffering)
+        else:
+            # in python2, stdout is but a "file"
+            return codecs.getwriter(encoding)(stream, 'replace')
+
     def bold(self, string):
         self.write(bold(string))
 
index 9435dbb263ac9660b1e66ae6561ea19f4302cfbd..a74e35d47ad74fe55f917eb3681eb0c0507f1411 100644 (file)
@@ -1,4 +1,9 @@
+# -*- mode:python; tab-width:4; indent-tabs-mode:nil; coding:utf-8 -*-
+
+import codecs
+import io
 import pytest
+import sys
 from ceph_volume import terminal
 
 
@@ -66,3 +71,50 @@ class TestDispatch(object):
         with pytest.raises(SystemExit) as error:
             terminal.dispatch({'sub': BadSubCommand}, argv=['sub'])
         assert str(error.value) == '100'
+
+
+@pytest.fixture
+def stream():
+    def make_stream(buffer, encoding):
+        # mock a stdout with given encoding
+        if sys.version_info >= (3, 0):
+            stdout = sys.stdout
+            stream = io.TextIOWrapper(buffer,
+                                      encoding=encoding,
+                                      errors=stdout.errors,
+                                      newline=stdout.newlines,
+                                      line_buffering=stdout.line_buffering)
+        else:
+            stream = codecs.getwriter(encoding)(buffer)
+            # StreamWriter does not have encoding attached to it, it will ask
+            # the inner buffer for "encoding" attribute in this case
+            stream.encoding = encoding
+        return stream
+    return make_stream
+
+
+class TestWriteUnicode(object):
+
+    def setup(self):
+        self.octpus_and_squid_en = u'octpus and squid'
+        octpus_and_squid_zh = u'章鱼和鱿鱼'
+        self.message = self.octpus_and_squid_en + octpus_and_squid_zh
+
+    def test_stdout_writer(self, capsys):
+        # should work with whatever stdout is
+        terminal.stdout(self.message)
+        out, _ = capsys.readouterr()
+        assert self.octpus_and_squid_en in out
+
+    @pytest.mark.parametrize('encoding', ['ascii', 'utf8'])
+    def test_writer(self, encoding, stream, monkeypatch, capsys):
+        buffer = io.BytesIO()
+        # should keep writer alive
+        with capsys.disabled():
+            # we want to have access to the sys.stdout's attributes in
+            # make_stream(), not the ones of pytest.capture.EncodedFile
+            writer = stream(buffer, encoding)
+            monkeypatch.setattr(sys, 'stdout', writer)
+            terminal.stdout(self.message)
+            sys.stdout.flush()
+            assert self.octpus_and_squid_en.encode(encoding) in buffer.getvalue()