Added context manager to ignore Unicode<Decode/Encode>Error

Ayuto · Ayuto · commit b8119393a3b9 · 2017-02-18T22:03:51.000+01:00
diff --git a/addons/source-python/packages/source-python/core/__init__.py b/addons/source-python/packages/source-python/core/__init__.py
@@ -6,8 +6,12 @@
 # >> IMPORTS
 # =============================================================================
 # Python Imports
+#   Codecs
+import codecs
 #   Collections
 from collections import defaultdict
+#   Contextlib
+from contextlib import contextmanager
 #   Inspect
 from inspect import getmodule
 from inspect import stack
@@ -33,6 +37,7 @@
 from paths import GAME_PATH
 
 
+
 # =============================================================================
 # >> FORWARD IMPORTS
 # =============================================================================
@@ -57,6 +62,7 @@
            'console_message',
            'echo_console',
            'get_interface',
+           'ignore_unicode_errors',
            )
 
 
@@ -156,3 +162,44 @@ def echo_console(text):
     """
     for line in text.split('\n'):
         console_message(line + '\n')
+
+@contextmanager
+def ignore_unicode_errors(errors='ignore'):
+    """Overwrite the ``strict`` codecs error handler temporarily.
+
+    This is useful e.g. if the engine truncates a string, which results in a
+    string that contains a splitted multi-byte character at the end of the
+    string.
+
+    :param str errors:
+        Error handler that will be looked up via :func:`codecs.lookup_error`.
+    :raise LookupError:
+        Raised if the error handler was not found.
+
+    Example:
+
+    .. code:: python
+
+        import memory
+
+        # Allocate four bytes to create an erroneous string
+        ptr = memory.alloc(4)
+
+        # Write data to the memory that will usually result in a
+        # UnicodeDecodeError
+        ptr.set_uchar(ord('a'), 0)
+        ptr.set_uchar(ord('b'), 1)
+        ptr.set_uchar(226, 2) # Add the invalid byte
+        ptr.set_uchar(0, 3) # Indicate the end of the string
+
+        with ignore_unicode_errors():
+            # Read the data as a string. Now, it will only print 'ab', because
+            # the invalid byte has been removed/ignored.
+            print(ptr.get_string_array())
+    """
+    old_handler = codecs.lookup_error('strict')
+    codecs.register_error('strict', codecs.lookup_error(errors))
+    try:
+        yield
+    finally:
+        codecs.register_error('strict', old_handler)