Fix UnicodeDecodeError when postgres is not configured in english (#4253)

This is a bit of a half-assed effort at fixing https://github.com/matrix-org/synapse/issues/4252. Fundamentally the right answer is to drop support for Python 2.
This commit is contained in:
Richard van der Hoff 2018-12-04 11:55:52 +01:00 committed by GitHub
parent f144c0a210
commit ecc23188f4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 47 additions and 8 deletions

1
changelog.d/4253.bugfix Normal file
View file

@ -0,0 +1 @@
Fix UnicodeDecodeError when postgres is configured to give non-English errors

View file

@ -29,6 +29,7 @@ from synapse.api.errors import StoreError
from synapse.storage.engines import PostgresEngine from synapse.storage.engines import PostgresEngine
from synapse.util.caches.descriptors import Cache from synapse.util.caches.descriptors import Cache
from synapse.util.logcontext import LoggingContext, PreserveLoggingContext from synapse.util.logcontext import LoggingContext, PreserveLoggingContext
from synapse.util.stringutils import exception_to_unicode
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -249,32 +250,32 @@ class SQLBaseStore(object):
except self.database_engine.module.OperationalError as e: except self.database_engine.module.OperationalError as e:
# This can happen if the database disappears mid # This can happen if the database disappears mid
# transaction. # transaction.
logger.warn( logger.warning(
"[TXN OPERROR] {%s} %s %d/%d", "[TXN OPERROR] {%s} %s %d/%d",
name, e, i, N name, exception_to_unicode(e), i, N
) )
if i < N: if i < N:
i += 1 i += 1
try: try:
conn.rollback() conn.rollback()
except self.database_engine.module.Error as e1: except self.database_engine.module.Error as e1:
logger.warn( logger.warning(
"[TXN EROLL] {%s} %s", "[TXN EROLL] {%s} %s",
name, e1, name, exception_to_unicode(e1),
) )
continue continue
raise raise
except self.database_engine.module.DatabaseError as e: except self.database_engine.module.DatabaseError as e:
if self.database_engine.is_deadlock(e): if self.database_engine.is_deadlock(e):
logger.warn("[TXN DEADLOCK] {%s} %d/%d", name, i, N) logger.warning("[TXN DEADLOCK] {%s} %d/%d", name, i, N)
if i < N: if i < N:
i += 1 i += 1
try: try:
conn.rollback() conn.rollback()
except self.database_engine.module.Error as e1: except self.database_engine.module.Error as e1:
logger.warn( logger.warning(
"[TXN EROLL] {%s} %s", "[TXN EROLL] {%s} %s",
name, e1, name, exception_to_unicode(e1),
) )
continue continue
raise raise

View file

@ -16,7 +16,8 @@
import random import random
import string import string
from six import PY3 import six
from six import PY2, PY3
from six.moves import range from six.moves import range
_string_with_symbols = ( _string_with_symbols = (
@ -71,3 +72,39 @@ def to_ascii(s):
return s.encode("ascii") return s.encode("ascii")
except UnicodeEncodeError: except UnicodeEncodeError:
return s return s
def exception_to_unicode(e):
"""Helper function to extract the text of an exception as a unicode string
Args:
e (Exception): exception to be stringified
Returns:
unicode
"""
# urgh, this is a mess. The basic problem here is that psycopg2 constructs its
# exceptions with PyErr_SetString, with a (possibly non-ascii) argument. str() will
# then produce the raw byte sequence. Under Python 2, this will then cause another
# error if it gets mixed with a `unicode` object, as per
# https://github.com/matrix-org/synapse/issues/4252
# First of all, if we're under python3, everything is fine because it will sort this
# nonsense out for us.
if not PY2:
return str(e)
# otherwise let's have a stab at decoding the exception message. We'll circumvent
# Exception.__str__(), which would explode if someone raised Exception(u'non-ascii')
# and instead look at what is in the args member.
if len(e.args) == 0:
return u""
elif len(e.args) > 1:
return six.text_type(repr(e.args))
msg = e.args[0]
if isinstance(msg, bytes):
return msg.decode('utf-8', errors='replace')
else:
return msg