From 3f6bc5c68cbaf17559cc3dd09bd712e67851037a Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Thu, 30 Mar 2023 09:44:47 +0300
Subject: [PATCH 01/29] golang_str: fix UCS2 builds

    + ./trun python -m pytest -vvsx golang/golang_str_test.py
    ==================================== test session starts =====================================
    platform linux2 -- Python 2.7.18, pytest-4.6.11, py-1.11.0, pluggy-0.13.1 -- /home/kirr/src/tools/go/py2d.venv2023/bin/python
    cachedir: .pytest_cache
    rootdir: /home/kirr/src/tools/go/pygolang-xgpystr
    collected 64 items

    golang/golang_str_test.py::test_strings_basic Traceback (most recent call last):
      File "golang/_golang_str.pyx", line 2270, in golang._golang._xuniord
        return ord(u)
    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
    Exception ValueError: 'only single character unicode strings can be converted to Py_UCS4, got length 2' in 'golang._golang._utf8_decode_rune' ignored

    (py2d.venv2023) kirr@deca:~/src/tools/go/pygolang-xgpystr$ python
    Python 2.7.18 (tags/2.7-dirty:8d21aa21f2c, Mar 30 2023, 07:38:40)
    [GCC 10.2.1 20210110] on linux2
    Type "help", "copyright", "credits" or "license" for more information.
    >>> from pygolang import *
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    ImportError: No module named pygolang
    >>> from golang import *
    >>> ord('xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(b'xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(u'xy')
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(b('xy'))
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: ord() expected a character, but string of length 2 found
    >>> ord(u('xy'))
    Traceback (most recent call last):
      File "golang/_golang_str.pyx", line 2270, in golang._golang._xuniord
        return ord(u)
    ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
    Exception ValueError: 'only single character unicode strings can be converted to Py_UCS4, got length 2' in 'golang._golang._utf8_decode_rune' ignored
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "golang/_golang_str.pyx", line 157, in golang._golang.pyu
        us = _pyu(pyustr, s)
      File "golang/_golang_str.pyx", line 195, in golang._golang._pyu
        s = _utf8_decode_surrogateescape(s)
      File "golang/_golang_str.pyx", line 2198, in golang._golang._utf8_decode_surrogateescape
        emit(_xunichr(r))
      File "golang/_golang_str.pyx", line 2286, in golang._golang._xunichr
        return unichr(0xd800 + (uh >> 10)) + \
    ValueError: unichr() arg not in range(0x10000) (narrow Python build)

It was broken in 50b8cb7e (strconv: Move functionality related to UTF8 encode/decode into _golang_str)
---
 golang/_golang_str.pyx | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 989f7a3..c4cdef5 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2022  Nexedi SA and Contributors.
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -1791,8 +1791,9 @@ cdef (int, int) _utf8_decode_rune(const uint8_t[::1] s):
         if _ucs2_build and len(r) == 2:
             try:
                 return _xuniord(r), l
-            # e.g. TypeError: ord() expected a character, but string of length 2 found
-            except TypeError:
+            # py: TypeError: ord() expected a character, but string of length 2 found
+            # cy: ValueError: only single character unicode strings can be converted to Py_UCS4, got length 2
+            except (TypeError, ValueError):
                 l -= 1
                 continue
 

From 8dc44e124fc24f873d8e0812a161ec498e6bf5f9 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Sun, 30 Apr 2023 22:18:09 +0300
Subject: [PATCH 02/29] fixup! golang_str: bstr/ustr pickle support

In ebd18f3f the code was ok but there is a thinko in test: it needs to
test all pickle protocols from 0 to _including_ HIGHEST_PROTOCOL.
---
 golang/golang_str_test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 384e63e..da6b255 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2022  Nexedi SA and Contributors.
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -313,7 +313,7 @@ def test_strings_pickle():
     us = u("май")
 
     #from pickletools import dis
-    for proto in range(0, pickle.HIGHEST_PROTOCOL):
+    for proto in range(0, pickle.HIGHEST_PROTOCOL+1):
         p_bs = pickle.dumps(bs, proto)
         #dis(p_bs)
         bs_ = pickle.loads(p_bs)

From 9e2dab5002c919fdf7924defe03b3457af70da52 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Sun, 30 Apr 2023 22:25:06 +0300
Subject: [PATCH 03/29] golang_str: tests: Adjust test_strings_index2 not to
 depend on repr(ustr|bstr)

repr(ustr|bstr) will change behaviour depending on whether we are
running under regular python, or gpython with string types replaced by
bstr/ustr. But this test is completely orthogonal to that. -> Let's
untie it from particular repr behaviour by emitting verified items in
quoted form + asserting their types in the code.
---
 golang/testprog/golang_test_str_index2.py  | 13 +++++++-----
 golang/testprog/golang_test_str_index2.txt | 24 +++++++++++-----------
 2 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/golang/testprog/golang_test_str_index2.py b/golang/testprog/golang_test_str_index2.py
index f9790ec..4fb4790 100755
--- a/golang/testprog/golang_test_str_index2.py
+++ b/golang/testprog/golang_test_str_index2.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2022  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2022-2023  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -29,7 +29,8 @@
 
 from __future__ import print_function, absolute_import
 
-from golang import b, u
+from golang import b, u, bstr, ustr
+from golang.gcompat import qq
 
 
 def main():
@@ -37,8 +38,10 @@ def main():
     bs = b("миру мир")
 
     def emit(what, uobj, bobj):
-        print("u"+what, repr(uobj))
-        print("b"+what, repr(bobj))
+        assert type(uobj) is ustr
+        assert type(bobj) is bstr
+        print("u"+what, qq(uobj))
+        print("b"+what, qq(bobj))
 
     emit("s",       us,        bs)
     emit("s[:]",    us[:],     bs[:])
diff --git a/golang/testprog/golang_test_str_index2.txt b/golang/testprog/golang_test_str_index2.txt
index 5977e19..c18811a 100644
--- a/golang/testprog/golang_test_str_index2.txt
+++ b/golang/testprog/golang_test_str_index2.txt
@@ -1,12 +1,12 @@
-us u('миру мир')
-bs b('миру мир')
-us[:] u('миру мир')
-bs[:] b('миру мир')
-us[0:1] u('м')
-bs[0:1] b(b'\xd0')
-us[0:2] u('ми')
-bs[0:2] b('м')
-us[1:2] u('и')
-bs[1:2] b(b'\xbc')
-us[0:-1] u('миру ми')
-bs[0:-1] b(b'миру ми\xd1')
+us "миру мир"
+bs "миру мир"
+us[:] "миру мир"
+bs[:] "миру мир"
+us[0:1] "м"
+bs[0:1] "\xd0"
+us[0:2] "ми"
+bs[0:2] "м"
+us[1:2] "и"
+bs[1:2] "\xbc"
+us[0:-1] "миру ми"
+bs[0:-1] "миру ми\xd1"

From bf16f6851b2a9ffeb01b8dc3027dfadec00550c1 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 10:04:31 +0300
Subject: [PATCH 04/29] golang_str: Fix bstr.decode to handle 'string-escape'
 codec properly

On py2 str.decode('string-escape') returns str, not unicode and this
property is actually being used and relied upon by Lib/pickle.py:

https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Lib/pickle.py#L967-L977

We promised bstr to be drop-in replacement for str on py2, so let's
adjust its behaviour to match the original because if we do not,
unpickling strings will break when str is replaced by bstr under
gpython.

Do not add bstr.encode yet until we hit a real case where it is actually used.
---
 golang/_golang_str.pyx    |  3 +++
 golang/golang_str_test.py | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index c4cdef5..3294c7c 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -422,6 +422,9 @@ class pybstr(bytes):
             x = _utf8_decode_surrogateescape(self)
         else:
             x = bytes.decode(self, encoding, errors)
+        # on py2 e.g. bytes.decode('string-escape') returns bytes
+        if PY_MAJOR_VERSION < 3  and  isinstance(x, bytes):
+            return pyb(x)
         return pyu(x)
 
     if PY_MAJOR_VERSION < 3:
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index da6b255..e200546 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -690,6 +690,17 @@ def test_strings_encodedecode():
     with raises(UnicodeEncodeError):
         u_k8mir.encode('ascii')
 
+    # on py2 there are encodings for which bytes.decode returns bytes
+    # e.g. bytes.decode('string-escape') is actually used by pickle
+    # verify that this exact semantic is preserved
+    if six.PY3:
+        with raises(LookupError):  bs.decode('hex')
+        with raises(LookupError):  bs.decode('string-escape')
+    else:
+        _ = bs.decode('string-escape');          assert type(_) is bstr;  assert _ == bs
+        _ = b(r'x\'y').decode('string-escape');  assert type(_) is bstr;  assert _bdata(_) == b"x'y"
+        _ = b('616263').decode('hex');           assert type(_) is bstr;  assert _bdata(_) == b"abc"
+
 
 # verify string operations like `x * 3` for all cases from bytes, bytearray, unicode, bstr and ustr.
 @mark.parametrize('tx', (bytes, unicode, bytearray, bstr, ustr))

From 302b51c5292fcece7db194d46b3e7a32bece4d1f Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 17:10:24 +0300
Subject: [PATCH 05/29] golang_str: tests: Make test_strings_methods more
 robust with upcoming unicode=ustr

Previously test_strings_methods was testing a method via comparing bstr
and ustr results of .method() with similar result of unicode.method().
This works reasonably ok. However under gpython, when unicode will be
replaced with ustr, it will no longer compare results of bstr/ustr
methods with something good and external - indeed in that case bstr/ustr
.method() will be compared to result of ustr.method() which opens the
door for bugs to stay unnoticed.

-> Adjust the test to explicitly provide expected result for all entries
in the test vector. We make sure those results are good and match std
python because we also assert that unicode.method() matches it.
---
 golang/golang_str_test.py | 176 +++++++++++++++++++-------------------
 1 file changed, 89 insertions(+), 87 deletions(-)

diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index e200546..1c0c574 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -1530,7 +1530,10 @@ def test_strings_methods():
     # argv and kw being various combinations of unicode,bstr,ustr, bytes/bytearray.
     def checkop(s, meth, *argv, **kw):
         assert type(s) is str
-        ok = kw.pop('ok', None)
+        ok = kw.pop('ok')
+        if six.PY2:
+            ok = deepReplaceStr(ok, xunicode)
+        optional = kw.pop('optional', False)
         bs = b(s)
         us = u(s)
         # verify {str,bstr,ustr}.meth with str arguments
@@ -1545,13 +1548,11 @@ def checkop(s, meth, *argv, **kw):
             r = xcall(s, meth, *argv_unicode, **kw_unicode)
 
         # we provide fallback implementations on e.g. py2
-        if ok is not None:
-            if six.PY2:
-                ok = xunicode(ok)
-            if isinstance(r, NotImplementedError):
+        if isinstance(r, NotImplementedError):
+            if not optional:
                 r = ok
-            else:
-                assert r == ok
+        else:
+            assert r == ok
 
         assert type(s) is unicode
         br = xcall(bs, meth, *argv, **kw)
@@ -1662,90 +1663,91 @@ def _(*argv, **kw):
 
     _ = Verifier
 
-    _("миру мир").__contains__("ру")
-    _("миру мир").__contains__("α")
-    _("мир").capitalize()
-    _("МиР").casefold()
-    _("мир").center(10)
-    _("мир").center(10, "ж")
+    _("миру мир").__contains__("ру",                ok=True)
+    _("миру мир").__contains__("α",                 ok=False)
+    _("мир").capitalize(                            ok="Мир")
+    _("МиР").casefold(                              ok="мир",   optional=True)  # py3.3
+    _("мир").center(10,                             ok="   мир    ")
+    _("мир").center(10, "ж",                        ok="жжжмиржжжж")
     # count, endswith       - tested in test_strings_index
-    _("миру\tмир").expandtabs()
-    _("миру\tмир").expandtabs(4)
+    _("миру\tмир").expandtabs(                      ok="миру    мир")
+    _("миру\tмир").expandtabs(2,                    ok="миру  мир")
     # find, index           - tested in test_strings_index
-    _("мир").isalnum()
-    _("мир!").isalnum()
-    _("мир").isalpha()
-    _("мир!").isalpha()
-    _("мир").isascii()
-    _("hello").isascii()
-    _("hellЫ").isascii()
-    _("123 мир").isdecimal()
-    _("123 q").isdecimal()
-    _("123").isdecimal()
-    _("мир").isdigit()
-    _("123 мир").isdigit()
-    _("123 q").isdigit()
-    _("123").isdigit()
-    _("٤").isdigit()            # arabic 4
-    _("мир").isidentifier()
-    _("мир$").isidentifier()
-    _("мир").islower()
-    _("Мир").islower()
-    _("мир").isnumeric()
-    _("123").isnumeric()
-    _("0x123").isnumeric()
-    _("мир").isprintable()
-    _("\u2009").isspace()       # thin space
-    _("  ").isspace()
-    _("мир").isspace()
-    _("мир").istitle()
-    _("Мир").istitle()
-    _(" мир ").join(["да", "май", "труд"])
-    _("мир").ljust(10)
-    _("мир").ljust(10, 'ж')
-    _("МиР").lower()
-    _("\u2009 мир").lstrip()
-    _("\u2009 мир\u2009 ").lstrip()
-    _("мммир").lstrip('ми')
-    _("миру мир").partition('ру')
-    _("миру мир").partition('ж')
-    _("миру мир").removeprefix("мир")
-    _("миру мир").removesuffix("мир")
-    _("миру мир").replace("ир", "ж")
-    _("миру мир").replace("ир", "ж", 1)
+    _("мир").isalnum(                               ok=True)
+    _("мир!").isalnum(                              ok=False)
+    _("мир").isalpha(                               ok=True)
+    _("мир!").isalpha(                              ok=False)
+    _("мир").isascii(                               ok=False,   optional=True)  # py3.7
+    _("hello").isascii(                             ok=True,    optional=True)  # py3.7
+    _("hellЫ").isascii(                             ok=False,   optional=True)  # py3.7
+    _("123 мир").isdecimal(                         ok=False)
+    _("123 q").isdecimal(                           ok=False)
+    _("123").isdecimal(                             ok=True)
+    _("мир").isdigit(                               ok=False)
+    _("123 мир").isdigit(                           ok=False)
+    _("123 q").isdigit(                             ok=False)
+    _("123").isdigit(                               ok=True)
+    _("٤").isdigit(                                 ok=True)                    # arabic 4
+    _("мир").isidentifier(                          ok=True,    optional=True)  # py3.0
+    _("мир$").isidentifier(                         ok=False,   optional=True)  # py3.0
+    _("мир").islower(                               ok=True)
+    _("Мир").islower(                               ok=False)
+    _("мир").isnumeric(                             ok=False)
+    _("123").isnumeric(                             ok=True)
+    _("0x123").isnumeric(                           ok=False)
+    _("мир").isprintable(                           ok=True,    optional=True)  # py3.0
+    _("\u2009").isspace(                            ok=x32(True,False))         # thin space
+    _("  ").isspace(                                ok=True)
+    _("мир").isspace(                               ok=False)
+    _("мир").istitle(                               ok=False)
+    _("Мир").istitle(                               ok=True)
+    _("МИр").istitle(                               ok=False)
+    _(" мир ").join(["да", "май", "труд"],          ok="да мир май мир труд")
+    _("мир").ljust(10,                              ok="мир       ")
+    _("мир").ljust(10, 'ж',                         ok="миржжжжжжж")
+    _("МиР").lower(                                 ok="мир")
+    _("\u2009 мир").lstrip(                         ok=x32("мир", "\u2009 мир"))
+    _("\u2009 мир\u2009 ").lstrip(                  ok=x32("мир\u2009 ", "\u2009 мир\u2009 "))
+    _("мммир").lstrip('ми',                         ok="р")
+    _("миру мир").partition('ру',                   ok=("ми", "ру", " мир"))
+    _("миру мир").partition('ж',                    ok=("миру мир", "", ""))
+    _("миру мир").removeprefix("мир",               ok="у мир", optional=True)  # py3.9
+    _("миру мир").removesuffix("мир",               ok="миру ", optional=True)  # py3.9
+    _("миру мир").replace("ир", "ж",                ok="мжу мж")
+    _("миру мир").replace("ир", "ж", 1,             ok="мжу мир")
     # rfind, rindex         - tested in test_strings_index
-    _("мир").rjust(10)
-    _("мир").rjust(10, 'ж')
-    _("миру мир").rpartition('ру')
-    _("миру мир").rpartition('ж')
-    _("мир").rsplit()
-    _("привет мир").rsplit()
-    _("привет\u2009мир").rsplit()
-    _("привет мир").rsplit("и")
-    _("привет мир").rsplit("и", 1)
-    _("мир \u2009").rstrip()
-    _(" мир \u2009").rstrip()
-    _("мируу").rstrip('ру')
-    _("мир").split()
-    _("привет мир").split()
-    _("привет\u2009мир").split()
-    _("привет мир").split("и")
-    _("привет мир").split("и", 1)
-    _("мир").splitlines()
-    _("миру\nмир").splitlines()
-    _("миру\nмир").splitlines(True)
-    _("миру\nмир\n").splitlines(True)
-    _("мир\nтруд\nмай\n").splitlines()
-    _("мир\nтруд\nмай\n").splitlines(True)
+    _("мир").rjust(10,                              ok="       мир")
+    _("мир").rjust(10, 'ж',                         ok="жжжжжжжмир")
+    _("миру мир").rpartition('ру',                  ok=("ми", "ру", " мир"))
+    _("миру мир").rpartition('ж',                   ok=("", "", "миру мир"))
+    _("мир").rsplit(                                ok=["мир"])
+    _("привет мир").rsplit(                         ok=["привет", "мир"])
+    _("привет\u2009мир").rsplit(                    ok=x32(["привет", "мир"], ["привет\u2009мир"]))
+    _("привет мир").rsplit("и",                     ok=["пр", "вет м", "р"])
+    _("привет мир").rsplit("и", 1,                  ok=["привет м", "р"])
+    _("мир \u2009").rstrip(                         ok=x32("мир", "мир \u2009"))
+    _(" мир \u2009").rstrip(                        ok=x32(" мир", " мир \u2009"))
+    _("мируу").rstrip('ру',                         ok="ми")
+    _("мир").split(                                 ok=["мир"])
+    _("привет мир").split(                          ok=["привет", "мир"])
+    _("привет\u2009мир").split(                     ok=x32(['привет', 'мир'], ["привет\u2009мир"]))
+    _("привет мир").split("и",                      ok=["пр", "вет м", "р"])
+    _("привет мир").split("и", 1,                   ok=["пр", "вет мир"])
+    _("мир").splitlines(                            ok=["мир"])
+    _("миру\nмир").splitlines(                      ok=["миру", "мир"])
+    _("миру\nмир").splitlines(True,                 ok=["миру\n", "мир"])
+    _("миру\nмир\n").splitlines(True,               ok=["миру\n", "мир\n"])
+    _("мир\nтруд\nмай\n").splitlines(               ok=["мир", "труд", "май"])
+    _("мир\nтруд\nмай\n").splitlines(True,          ok=["мир\n", "труд\n", "май\n"])
     # startswith            - tested in test_strings_index
-    _("\u2009 мир \u2009").strip()
-    _("миру мир").strip('мир')
-    _("МиР").swapcase()
-    _("МиР").title()
-    _("мир").translate({ord(u'м'):ord(u'и'), ord(u'и'):'я', ord(u'р'):None})
-    _("МиР").upper()
-    _("мир").zfill(10)
-    _("123").zfill(10)
+    _("\u2009 мир \u2009").strip(                   ok=x32("мир", "\u2009 мир \u2009"))
+    _("миру мир").strip('мир',                      ok="у ")
+    _("МиР").swapcase(                              ok="мИр")
+    _("МиР").title(                                 ok="Мир")
+    _("мир").translate({ord(u'м'):ord(u'и'), ord(u'и'):'я', ord(u'р'):None},        ok="ия")
+    _("МиР").upper(                                 ok="МИР")
+    _("мир").zfill(10,                              ok="0000000мир")
+    _("123").zfill(10,                              ok="0000000123")
 
 
 # verify bstr.translate in bytes mode

From e75d21fdf22459e40bb2fc99cd4a4920e47dc9e8 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 17:38:18 +0300
Subject: [PATCH 06/29] gpython: tests: Factorize test_Xruntime

Factor-out subroutine to run tfunc in subprocess interpreter spawned with
`-X xopt=xval`. This helps clarity and later in addition to `-X
gpython.runtime` we will also need it to verify `-X gpython.strings`.
---
 gpython/gpython_test.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/gpython/gpython_test.py b/gpython/gpython_test.py
index f38b56a..e7d1336 100644
--- a/gpython/gpython_test.py
+++ b/gpython/gpython_test.py
@@ -359,20 +359,26 @@ def test_pymain_run_via_relpath():
     out2 = pyout(['./__init__.py'] + argv, pyexe=sys._gpy_underlying_executable, cwd=here)
     assert out1 == out2
 
+
 # verify -X gpython.runtime=...
 @gpython_only
 def test_Xruntime(runtime):
+    _xopt_assert_in_subprocess('gpython.runtime', runtime,
+                                assert_gevent_activated  if runtime != 'threads'  else \
+                                assert_gevent_not_activated)
+
+# _xopt_assert_in_subprocess runs tfunc in subprocess interpreter spawned with
+# `-X xopt=xval` and checks that there is no error.
+def _xopt_assert_in_subprocess(xopt, xval, tfunc):
+    XOPT = xopt.upper().replace('.','_')    # gpython.runtime -> GPYTHON_RUNTIME
     env = os.environ.copy()
-    env.pop('GPYTHON_RUNTIME', None) # del
+    env.pop(XOPT, None) # del
 
     argv = []
-    if runtime != '':
-        argv += ['-X', 'gpython.runtime='+runtime]
+    if xval != '':
+        argv += ['-X', xopt+'='+xval]
     prog = 'from gpython import gpython_test as t; '
-    if runtime != 'threads':
-        prog += 't.assert_gevent_activated(); '
-    else:
-        prog += 't.assert_gevent_not_activated(); '
+    prog += 't.%s(); ' % tfunc.__name__
     prog += 'print("ok")'
     argv += ['-c', prog]
 

From 5716b1303c6caf803d4042116e1748fe10b06208 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 17:48:40 +0300
Subject: [PATCH 07/29] gpython: Fix `gpython -X gpython.runtime=threads` to
 spawn subinterpreters with threads runtime by default

Previously it was not the case and gpython with default being gevent
runtime was spawned even if parent gpython was instructed to use threads runtime:

    (z-dev) kirr@deca:~/src/tools/go/pygolang$ gpython -X gpython.runtime=threads
    Python 2.7.18 (default, Apr 28 2021, 17:39:59)
    [GCC 10.2.1 20210110] [GPython 0.1] [threads] on linux2
    Type "help", "copyright", "credits" or "license" for more information.
    (InteractiveConsole)
    >>> import sys
    >>> sys.version
    '2.7.18 (default, Apr 28 2021, 17:39:59) \n[GCC 10.2.1 20210110] [GPython 0.1] [threads]'   <-- NOTE threads
    >>> import subprocess
    subprocess.call(sys.executable)ble)
    Python 2.7.18 (default, Apr 28 2021, 17:39:59)
    [GCC 10.2.1 20210110] [GPython 0.1] [gevent 21.1.2] on linux2                               <-- NOTE gevent
    Type "help", "copyright", "credits" or "license" for more information.
    (InteractiveConsole)
    >>>
---
 gpython/__init__.py     | 5 +++++
 gpython/gpython_test.py | 7 ++++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/gpython/__init__.py b/gpython/__init__.py
index 4a3a7ff..45b0a16 100755
--- a/gpython/__init__.py
+++ b/gpython/__init__.py
@@ -408,6 +408,11 @@ def main():
 
     argv = [sys.argv[0]] + argv_ + igetopt.argv
 
+    # propagate those settings as defaults to subinterpreters, so that e.g.
+    # sys.executable spawned from under `gpython -X gpython.runtime=threads`
+    # also uses "threads" runtime by default.
+    os.environ['GPYTHON_RUNTIME'] = gpy_runtime
+
     # init initializes according to selected runtime
     # it is called after options are parsed and sys.path is setup correspondingly.
     # this way golang and gevent are imported from exactly the same place as
diff --git a/gpython/gpython_test.py b/gpython/gpython_test.py
index e7d1336..420d0d4 100644
--- a/gpython/gpython_test.py
+++ b/gpython/gpython_test.py
@@ -369,6 +369,9 @@ def test_Xruntime(runtime):
 
 # _xopt_assert_in_subprocess runs tfunc in subprocess interpreter spawned with
 # `-X xopt=xval` and checks that there is no error.
+#
+# It is also verified that tfunc runs ok in sub-subprocess interpreter spawned
+# _without_ `-X ...`, i.e. once given -X setting is inherited by spawned interpreters.
 def _xopt_assert_in_subprocess(xopt, xval, tfunc):
     XOPT = xopt.upper().replace('.','_')    # gpython.runtime -> GPYTHON_RUNTIME
     env = os.environ.copy()
@@ -377,8 +380,10 @@ def _xopt_assert_in_subprocess(xopt, xval, tfunc):
     argv = []
     if xval != '':
         argv += ['-X', xopt+'='+xval]
-    prog = 'from gpython import gpython_test as t; '
+    prog = import_t = 'from gpython import gpython_test as t; '
     prog += 't.%s(); ' % tfunc.__name__
+    prog += import_t  # + same in subprocess
+    prog += "t.pyrun(['-c', '%s t.%s(); ']); " % (import_t, tfunc.__name__)
     prog += 'print("ok")'
     argv += ['-c', prog]
 

From 21fab97588e19f6c596f94978e7cb266f5b02608 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 17:59:34 +0300
Subject: [PATCH 08/29] gpython: Fix thinko when rejecting unknown -X option

Before:

    (z-dev) kirr@deca:~/src/tools/go/pygolang$ gpython -X gpython.zzz
    Traceback (most recent call last):
      File "/home/kirr/src/wendelin/venv/z-dev/bin/gpython", line 3, in <module>
        from gpython import main; main()
      File "/home/kirr/src/tools/go/pygolang/gpython/__init__.py", line 397, in main
        raise RuntimeError('gpython: unknown -X option %s' % opt)
    RuntimeError: gpython: unknown -X option -X                                        <-- NOTE

After:

    (z-dev) kirr@deca:~/src/tools/go/pygolang$ gpython -X gpython.zzz
    Traceback (most recent call last):
      File "/home/kirr/src/wendelin/venv/z-dev/bin/gpython", line 3, in <module>
        from gpython import main; main()
      File "/home/kirr/src/tools/go/pygolang/gpython/__init__.py", line 397, in main
        raise RuntimeError('gpython: unknown -X option %s' % arg)
    RuntimeError: gpython: unknown -X option gpython.zzz                               <-- NOTE
---
 gpython/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gpython/__init__.py b/gpython/__init__.py
index 45b0a16..c6be786 100755
--- a/gpython/__init__.py
+++ b/gpython/__init__.py
@@ -394,7 +394,7 @@ def main():
                     sys._xoptions['gpython.runtime'] = gpy_runtime
 
                 else:
-                    raise RuntimeError('gpython: unknown -X option %s' % opt)
+                    raise RuntimeError('gpython: unknown -X option %s' % arg)
 
                 continue
 

From 4546aaecf80f348b811da94772edf34ba75f922c Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Sun, 26 Mar 2023 17:06:30 +0300
Subject: [PATCH 09/29] golang_str: Switch bstr/ustr to cdef classes

For gpython to switch builtin str/unicode to bstr/ustr we will need
bstr/ustr to have exactly the same C layout as builtin string types.
This is possible to achieve only via `cdef class`. It is also good to
switch to `cdef class` for RAM savings - from https://github.com/cython/cython/pull/5212#issuecomment-1387659026 :

    # what Cython does at runtime for `class MyBytes(bytes)`
    In [3]: MyBytes = type('MyBytes', (bytes,), {'__slots__': ()})

    In [4]: MyBytes
    Out[4]: __main__.MyBytes

    In [5]: a = bytes(b'123')

    In [6]: b = MyBytes(b'123')

    In [7]: a
    Out[7]: b'123'

    In [8]: b
    Out[8]: b'123'

    In [9]: a == b
    Out[9]: True

    In [10]: import sys

    In [11]: sys.getsizeof(a)
    Out[11]: 36

    In [12]: sys.getsizeof(b)
    Out[12]: 52

So with `cdef class` we gain more control and optimize memory usage.

This was not done before because cython forbids to `cdef class X(bytes)` due to
https://github.com/cython/cython/issues/711. We work it around in setup.py with
draft for proper patch pre-posted to upstream in https://github.com/cython/cython/pull/5212 .
---
 golang/_golang_str.pyx    | 174 +++++++++++++++++++++++++++++---------
 golang/golang_str_test.py |  21 ++++-
 setup.py                  |  19 +++++
 3 files changed, 173 insertions(+), 41 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 3294c7c..59cfe6d 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -29,6 +29,15 @@ from cpython cimport Py_EQ, Py_NE, Py_LT, Py_GT, Py_LE, Py_GE
 from cpython.iterobject cimport PySeqIter_New
 from cpython cimport PyThreadState_GetDict, PyDict_SetItem
 from cpython cimport PyObject_CheckBuffer
+
+cdef extern from "Python.h":
+    ctypedef struct PyBytesObject:
+        pass
+
+cdef extern from "Python.h":
+    ctypedef struct PyUnicodeObject:
+        pass
+
 cdef extern from "Python.h":
     """
     #if PY_MAJOR_VERSION < 3
@@ -45,6 +54,7 @@ cdef extern from "Python.h":
 cdef extern from "Python.h":
     ctypedef int (*initproc)(object, PyObject *, PyObject *) except -1
     ctypedef struct _XPyTypeObject "PyTypeObject":
+        PyObject* tp_new(PyTypeObject*, PyObject*, PyObject*) except NULL
         initproc  tp_init
         PySequenceMethods *tp_as_sequence
 
@@ -54,6 +64,8 @@ cdef extern from "Python.h":
         object (*sq_slice) (object, Py_ssize_t, Py_ssize_t)     # present only on py2
 
 
+from cython cimport no_gc
+
 from libc.stdint cimport uint8_t
 from libc.stdio cimport FILE
 
@@ -128,7 +140,12 @@ cdef _pyb(bcls, s): # -> ~bstr | None
             return None
 
     assert type(s) is bytes
-    return bytes.__new__(bcls, s)
+    # like  bytes.__new__(bcls, s)  but call bytes.tp_new directly
+    # else tp_new_wrapper complains because pybstr.tp_new != bytes.tp_new
+    argv = (s,)
+    obj = <object>(<_XPyTypeObject*>bytes).tp_new(<PyTypeObject*>bcls, <PyObject*>argv, NULL)
+    Py_DECREF(obj)
+    return obj
 
 cdef _pyu(ucls, s): # -> ~ustr | None
     if type(s) is ucls:
@@ -147,7 +164,12 @@ cdef _pyu(ucls, s): # -> ~ustr | None
             return None
 
     assert type(s) is unicode
-    return unicode.__new__(ucls, s)
+    # like  unicode .__new__(bcls, s)  but call unicode.tp_new directly
+    # else tp_new_wrapper complains because pyustr.tp_new != unicode.tp_new
+    argv = (s,)
+    obj = <object>(<_XPyTypeObject*>unicode).tp_new(<PyTypeObject*>ucls, <PyObject*>argv, NULL)
+    Py_DECREF(obj)
+    return obj
 
 # _ifbuffer_data returns contained data if obj provides buffer interface.
 cdef _ifbuffer_data(obj): # -> bytes|None
@@ -220,8 +242,8 @@ def pyuchr(int i):  # -> 1-character ustr
     return pyu(unichr(i))
 
 
-# XXX cannot `cdef class`: github.com/cython/cython/issues/711
-class pybstr(bytes):
+@no_gc                      # note setup.py assist this to compile despite
+cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     """bstr is byte-string.
 
     It is based on bytes and can automatically convert to/from unicode.
@@ -253,11 +275,10 @@ class pybstr(bytes):
     See also: b, ustr/u.
     """
 
-    # don't allow to set arbitrary attributes.
-    # won't be needed after switch to -> `cdef class`
-    __slots__ = ()
-
-    def __new__(cls, object='', encoding=None, errors=None):
+    # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
+    # pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↓ .____new__() .
+    @staticmethod
+    def ____new__(cls, object='', encoding=None, errors=None):
         # encoding or errors  ->  object must expose buffer interface
         if not (encoding is None and errors is None):
             object = _buffer_decode(object, encoding, errors)
@@ -360,8 +381,10 @@ class pybstr(bytes):
     def __add__(a, b):
         # NOTE Cython < 3 does not automatically support __radd__ for cdef class
         # https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods
-        # but pybstr is currently _not_ cdef'ed class
         # see also https://github.com/cython/cython/issues/4750
+        if type(a) is not pybstr:
+            assert type(b) is pybstr
+            return b.__radd__(a)
         return pyb(bytes.__add__(a, _pyb_coerce(b)))
 
     def __radd__(b, a):
@@ -377,6 +400,9 @@ class pybstr(bytes):
 
     # __mul__, __rmul__     (no need to override __imul__)
     def __mul__(a, b):
+        if type(a) is not pybstr:
+            assert type(b) is pybstr
+            return b.__rmul__(a)
         return pyb(bytes.__mul__(a, b))
     def __rmul__(b, a):
         return b.__mul__(a)
@@ -436,8 +462,7 @@ class pybstr(bytes):
     # all other string methods
 
     def capitalize(self):                       return pyb(pyu(self).capitalize())
-    if _strhas('casefold'): # py3.3  TODO provide py2 implementation
-        def casefold(self):                     return pyb(pyu(self).casefold())
+    def casefold(self):                         return pyb(pyu(self).casefold())
     def center(self, width, fillchar=' '):      return pyb(pyu(self).center(width, fillchar))
 
     def count(self, sub, start=None, end=None): return bytes.count(self, _pyb_coerce(sub), start, end)
@@ -463,12 +488,10 @@ class pybstr(bytes):
     # isascii(self)         no need to override
     def isdecimal(self):    return pyu(self).isdecimal()
     def isdigit(self):      return pyu(self).isdigit()
-    if _strhas('isidentifier'): # py3  TODO provide fallback implementation
-        def isidentifier(self): return pyu(self).isidentifier()
+    def isidentifier(self): return pyu(self).isidentifier()
     def islower(self):      return pyu(self).islower()
     def isnumeric(self):    return pyu(self).isnumeric()
-    if _strhas('isprintable'):  # py3  TODO provide fallback implementation
-        def isprintable(self):  return pyu(self).isprintable()
+    def isprintable(self):  return pyu(self).isprintable()
     def isspace(self):      return pyu(self).isspace()
     def istitle(self):      return pyu(self).istitle()
 
@@ -477,10 +500,8 @@ class pybstr(bytes):
     def lower(self):                        return pyb(pyu(self).lower())
     def lstrip(self, chars=None):           return pyb(pyu(self).lstrip(chars))
     def partition(self, sep):               return tuple(pyb(_) for _ in bytes.partition(self, _pyb_coerce(sep)))
-    if _strhas('removeprefix'): # py3.9  TODO provide fallback implementation
-        def removeprefix(self, prefix):     return pyb(pyu(self).removeprefix(prefix))
-    if _strhas('removesuffix'): # py3.9  TODO provide fallback implementation
-        def removesuffix(self, suffix):     return pyb(pyu(self).removesuffix(suffix))
+    def removeprefix(self, prefix):         return pyb(pyu(self).removeprefix(prefix))
+    def removesuffix(self, suffix):         return pyb(pyu(self).removesuffix(suffix))
     def replace(self, old, new, count=-1):  return pyb(bytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count))
 
     # NOTE rfind/rindex & friends should return byte-position, not unicode-position
@@ -528,8 +549,35 @@ class pybstr(bytes):
         return pyustr.maketrans(x, y, z)
 
 
-# XXX cannot `cdef class` with __new__: https://github.com/cython/cython/issues/799
-class pyustr(unicode):
+# hand-made pybstr.__new__  (workaround for https://github.com/cython/cython/issues/799)
+cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw) except NULL:
+    argv = ()
+    if _argv != NULL:
+        argv = <object>_argv
+    kw = {}
+    if _kw != NULL:
+        kw = <object>_kw
+
+    cdef object x = pybstr.____new__(<object>_cls, *argv, **kw)
+    Py_INCREF(x)
+    return <PyObject*>x
+(<_XPyTypeObject*>pybstr).tp_new    = &_pybstr_tp_new
+
+# bytes uses "optimized" and custom .tp_basicsize and .tp_itemsize:
+# https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L26-L32
+# https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L3816-L3820
+(<PyTypeObject*>pybstr) .tp_basicsize  =  (<PyTypeObject*>bytes).tp_basicsize
+(<PyTypeObject*>pybstr) .tp_itemsize   =  (<PyTypeObject*>bytes).tp_itemsize
+
+# make sure pybstr C layout corresponds to bytes C layout exactly
+# we patched cython to allow from-bytes cdef class inheritance and we also set
+# .tp_basicsize directly above. All this works ok only if C layouts for pybstr
+# and bytes are completely the same.
+assert sizeof(pybstr) == sizeof(PyBytesObject)
+
+
+@no_gc
+cdef class pyustr(unicode):
     """ustr is unicode-string.
 
     It is based on unicode and can automatically convert to/from bytes.
@@ -556,11 +604,10 @@ class pyustr(unicode):
     See also: u, bstr/b.
     """
 
-    # don't allow to set arbitrary attributes.
-    # won't be needed after switch to -> `cdef class`
-    __slots__ = ()
-
-    def __new__(cls, object='', encoding=None, errors=None):
+    # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
+    # pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↓ .____new__() .
+    @staticmethod
+    def ____new__(cls, object='', encoding=None, errors=None):
         # encoding or errors  ->  object must expose buffer interface
         if not (encoding is None and errors is None):
             object = _buffer_decode(object, encoding, errors)
@@ -652,8 +699,10 @@ class pyustr(unicode):
     def __add__(a, b):
         # NOTE Cython < 3 does not automatically support __radd__ for cdef class
         # https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods
-        # but pyustr is currently _not_ cdef'ed class
         # see also https://github.com/cython/cython/issues/4750
+        if type(a) is not pyustr:
+            assert type(b) is pyustr
+            return b.__radd__(a)
         return pyu(unicode.__add__(a, _pyu_coerce(b)))
 
     def __radd__(b, a):
@@ -671,6 +720,9 @@ class pyustr(unicode):
 
     # __mul__, __rmul__     (no need to override __imul__)
     def __mul__(a, b):
+        if type(a) is not pyustr:
+            assert type(b) is pyustr
+            return b.__rmul__(a)
         return pyu(unicode.__mul__(a, b))
     def __rmul__(b, a):
         return b.__mul__(a)
@@ -723,8 +775,7 @@ class pyustr(unicode):
     # all other string methods
 
     def capitalize(self):   return pyu(unicode.capitalize(self))
-    if _strhas('casefold'): # py3.3  TODO provide fallback implementation
-        def casefold(self): return pyu(unicode.casefold(self))
+    def casefold(self):     return pyu(unicode.casefold(self))
     def center(self, width, fillchar=' '):      return pyu(unicode.center(self, width, _pyu_coerce(fillchar)))
     def count(self, sub, start=None, end=None):
         # cython optimizes unicode.count to directly call PyUnicode_Count -
@@ -768,10 +819,8 @@ class pyustr(unicode):
     def lower(self):                        return pyu(unicode.lower(self))
     def lstrip(self, chars=None):           return pyu(unicode.lstrip(self, _xpyu_coerce(chars)))
     def partition(self, sep):               return tuple(pyu(_) for _ in unicode.partition(self, _pyu_coerce(sep)))
-    if _strhas('removeprefix'): # py3.9  TODO provide fallback implementation
-        def removeprefix(self, prefix):     return pyu(unicode.removeprefix(self, _pyu_coerce(prefix)))
-    if _strhas('removesuffix'): # py3.9  TODO provide fallback implementation
-        def removesuffix(self, suffix):     return pyu(unicode.removesuffix(self, _pyu_coerce(suffix)))
+    def removeprefix(self, prefix):         return pyu(unicode.removeprefix(self, _pyu_coerce(prefix)))
+    def removesuffix(self, suffix):         return pyu(unicode.removesuffix(self, _pyu_coerce(suffix)))
     def replace(self, old, new, count=-1):  return pyu(unicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count))
     def rfind(self, sub, start=None, end=None):
         if start is None: start = 0
@@ -864,6 +913,24 @@ class pyustr(unicode):
         return t
 
 
+# hand-made pyustr.__new__  (workaround for https://github.com/cython/cython/issues/799)
+cdef PyObject* _pyustr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw) except NULL:
+    argv = ()
+    if _argv != NULL:
+        argv = <object>_argv
+    kw = {}
+    if _kw != NULL:
+        kw = <object>_kw
+
+    cdef object x = pyustr.____new__(<object>_cls, *argv, **kw)
+    Py_INCREF(x)
+    return <PyObject*>x
+(<_XPyTypeObject*>pyustr).tp_new    = &_pyustr_tp_new
+
+# similarly to bytes - want same C layout for pyustr vs unicode
+assert sizeof(pyustr) == sizeof(PyUnicodeObject)
+
+
 # _pyustrIter wraps unicode iterator to return pyustr for each yielded character.
 cdef class _pyustrIter:
     cdef object uiter
@@ -941,6 +1008,31 @@ if PY2:
     (<_XPyTypeObject*>pyustr) .tp_as_sequence.sq_slice = NULL
 
 
+# ---- adjust bstr/ustr classes after what cython generated ----
+
+# remove unsupported bstr/ustr methods. do it outside of `cdef class` to
+# workaround https://github.com/cython/cython/issues/4556 (`if ...` during
+# `cdef class` is silently handled wrongly)
+cdef _bstrustr_remove_unsupported_slots():
+    vslot = (
+        'casefold',     # py3.3     TODO provide py2 implementation
+        'isidentifier', # py3       TODO provide fallback implementation
+        'isprintable',  # py3       TODO provide fallback implementation
+        'removeprefix', # py3.9     TODO provide fallback implementation
+        'removesuffix', # py3.9     TODO provide fallback implementation
+    )
+    for slot in vslot:
+        if not hasattr(unicode, slot):
+            _patch_slot(<PyTypeObject*>pybstr, slot, DEL)
+            try:
+                _patch_slot(<PyTypeObject*>pyustr, slot, DEL)
+            except KeyError:    # e.g. we do not define ustr.isprintable ourselves
+                pass
+_bstrustr_remove_unsupported_slots()
+
+
+# ---- quoting ----
+
 # _bpysmartquote_u3b2 quotes bytes/bytearray s the same way python would do for string.
 #
 # nonascii_escape indicates whether \xNN with NN >= 0x80 is present in the output.
@@ -1321,12 +1413,15 @@ cdef _InBStringify _inbstringify_get():
 #
 # if func_or_descr is descriptor (has __get__), it is installed as is.
 # otherwise it is wrapped with "unbound method" descriptor.
+#
+# if func_or_descr is DEL the slot is removed from typ's __dict__.
+cdef DEL = object()
 cdef _patch_slot(PyTypeObject* typ, str name, object func_or_descr):
     typdict = <dict>(typ.tp_dict)
     #print("\npatching %s.%s  with  %r" % (typ.tp_name, name, func_or_descr))
     #print("old:  %r" % typdict.get(name))
 
-    if hasattr(func_or_descr, '__get__'):
+    if hasattr(func_or_descr, '__get__')  or  func_or_descr is DEL:
         descr = func_or_descr
     else:
         func = func_or_descr
@@ -1335,7 +1430,10 @@ cdef _patch_slot(PyTypeObject* typ, str name, object func_or_descr):
         else:
             descr = _UnboundMethod(func)
 
-    typdict[name] = descr
+    if descr is DEL:
+        del typdict[name]
+    else:
+        typdict[name] = descr
     #print("new:  %r" % typdict.get(name))
     PyType_Modified(typ)
 
@@ -1686,10 +1784,6 @@ class _BFormatter(pystring.Formatter):
 
 # ---- misc ----
 
-# _strhas returns whether unicode string type has specified method.
-cdef bint _strhas(str meth) except *:
-    return hasattr(unicode, meth)
-
 cdef object _xpyu_coerce(obj):
     return _pyu_coerce(obj) if obj is not None else None
 
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 1c0c574..71c4cff 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -31,7 +31,7 @@
 import six
 from six import text_type as unicode, unichr
 from six.moves import range as xrange
-import re, pickle, copy, types
+import gc, re, pickle, copy, types
 import array, collections
 
 
@@ -284,6 +284,25 @@ def _(text, breprok, ureprok):
             bs.hello = 1
 
 
+# verify that bstr/ustr are created with correct refcount.
+def test_strings_refcount():
+    # first verify our logic on std type
+    obj = xbytes(u'abc');   assert type(obj) is bytes
+    gc.collect();   assert sys.getrefcount(obj) == 1+1   # +1 due to obj passed to getrefcount call
+
+    # bstr
+    obj = b('abc');         assert type(obj) is bstr
+    gc.collect();           assert sys.getrefcount(obj) == 1+1
+    obj = bstr('abc');      assert type(obj) is bstr
+    gc.collect();           assert sys.getrefcount(obj) == 1+1
+
+    # ustr
+    obj = u('abc');         assert type(obj) is ustr
+    gc.collect();           assert sys.getrefcount(obj) == 1+1
+    obj = ustr('abc');      assert type(obj) is ustr
+    gc.collect();           assert sys.getrefcount(obj) == 1+1
+
+
 # verify memoryview(bstr|ustr).
 def test_strings_memoryview():
     bs = b('мир')
diff --git a/setup.py b/setup.py
index 37e9f9d..bd5148b 100644
--- a/setup.py
+++ b/setup.py
@@ -18,6 +18,25 @@
 # See COPYING file for full licensing terms.
 # See https://www.nexedi.com/licensing for rationale and options.
 
+# patch cython to allow `cdef class X(bytes)` while building pygolang to
+# workaround https://github.com/cython/cython/issues/711
+# see `cdef class pybstr` in golang/_golang_str.pyx for details.
+# (should become unneeded with cython 3 once https://github.com/cython/cython/pull/5212 is finished)
+import inspect
+from Cython.Compiler.PyrexTypes import BuiltinObjectType
+def pygo_cy_builtin_type_name_set(self, v):
+    self._pygo_name = v
+def pygo_cy_builtin_type_name_get(self):
+    name = self._pygo_name
+    if name == 'bytes':
+        caller = inspect.currentframe().f_back.f_code.co_name
+        if caller == 'analyse_declarations':
+            # need anything different from 'bytes' to deactivate check in
+            # https://github.com/cython/cython/blob/c21b39d4/Cython/Compiler/Nodes.py#L4759-L4762
+            name = 'xxx'
+    return name
+BuiltinObjectType.name = property(pygo_cy_builtin_type_name_get, pygo_cy_builtin_type_name_set)
+
 from setuptools import find_packages
 from setuptools.command.install_scripts import install_scripts as _install_scripts
 from setuptools.command.develop import develop as _develop

From 7fd58d3433ae217218afbf4e890a76cdf0dd70d4 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Sun, 26 Mar 2023 20:25:31 +0300
Subject: [PATCH 10/29] golang_str: Invoke bytes/unicode methods via
 zbytes/zunicode

GPython will patch builtin bytes and unicode types.
zbytes and zunicode will refer to original unpatched types.
We will use them to invoke original bytes/unicode methods.

NOTE we will test against bytes/unicode - not zbytes/zunicode - when
inspecting type of objects. In other words we will use original
bytes/unicode types only to refer to their original methods and code.
---
 golang/_golang_str.pyx | 180 +++++++++++++++++++++--------------------
 1 file changed, 94 insertions(+), 86 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 59cfe6d..fae05e8 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -31,10 +31,12 @@ from cpython cimport PyThreadState_GetDict, PyDict_SetItem
 from cpython cimport PyObject_CheckBuffer
 
 cdef extern from "Python.h":
+    PyTypeObject PyBytes_Type
     ctypedef struct PyBytesObject:
         pass
 
 cdef extern from "Python.h":
+    PyTypeObject PyUnicode_Type
     ctypedef struct PyUnicodeObject:
         pass
 
@@ -80,6 +82,12 @@ else:
     import copy_reg as pycopyreg
 
 
+# zbytes/zunicode point to original std bytes/unicode types even if they will be patched.
+# we use them to invoke original bytes/unicode methods.
+cdef object zbytes   = <object>(&PyBytes_Type)
+cdef object zunicode = <object>(&PyUnicode_Type)
+
+
 def pyb(s): # -> bstr
     """b converts object to bstr.
 
@@ -140,10 +148,10 @@ cdef _pyb(bcls, s): # -> ~bstr | None
             return None
 
     assert type(s) is bytes
-    # like  bytes.__new__(bcls, s)  but call bytes.tp_new directly
-    # else tp_new_wrapper complains because pybstr.tp_new != bytes.tp_new
+    # like  zbytes.__new__(bcls, s)  but call zbytes.tp_new directly
+    # else tp_new_wrapper complains because pybstr.tp_new != zbytes.tp_new
     argv = (s,)
-    obj = <object>(<_XPyTypeObject*>bytes).tp_new(<PyTypeObject*>bcls, <PyObject*>argv, NULL)
+    obj = <object>(<_XPyTypeObject*>zbytes).tp_new(<PyTypeObject*>bcls, <PyObject*>argv, NULL)
     Py_DECREF(obj)
     return obj
 
@@ -164,10 +172,10 @@ cdef _pyu(ucls, s): # -> ~ustr | None
             return None
 
     assert type(s) is unicode
-    # like  unicode .__new__(bcls, s)  but call unicode.tp_new directly
-    # else tp_new_wrapper complains because pyustr.tp_new != unicode.tp_new
+    # like  zunicode .__new__(bcls, s)  but call zunicode.tp_new directly
+    # else tp_new_wrapper complains because pyustr.tp_new != zunicode.tp_new
     argv = (s,)
-    obj = <object>(<_XPyTypeObject*>unicode).tp_new(<PyTypeObject*>ucls, <PyObject*>argv, NULL)
+    obj = <object>(<_XPyTypeObject*>zunicode).tp_new(<PyTypeObject*>ucls, <PyObject*>argv, NULL)
     Py_DECREF(obj)
     return obj
 
@@ -317,7 +325,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     # retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves.
     def __reduce_ex__(self, protocol):
         if protocol >= 2:
-            return bytes.__reduce_ex__(self, protocol)
+            return zbytes.__reduce_ex__(self, protocol)
         return (
             pycopyreg._reconstructor,
             (self.__class__, self.__class__, _bdata(self))
@@ -332,7 +340,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if PY_MAJOR_VERSION >= 3:
             return hash(pyu(self))
         else:
-            return bytes.__hash__(self)
+            return zbytes.__hash__(self)
 
     # == != < > <= >=
     # NOTE == and != are special: they must succeed against any type so that
@@ -342,18 +350,18 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
             b = _pyb_coerce(b)
         except TypeError:
             return False
-        return bytes.__eq__(a, b)
+        return zbytes.__eq__(a, b)
     def __ne__(a, b):   return not a.__eq__(b)
-    def __lt__(a, b):   return bytes.__lt__(a, _pyb_coerce(b))
-    def __gt__(a, b):   return bytes.__gt__(a, _pyb_coerce(b))
-    def __le__(a, b):   return bytes.__le__(a, _pyb_coerce(b))
-    def __ge__(a, b):   return bytes.__ge__(a, _pyb_coerce(b))
+    def __lt__(a, b):   return zbytes.__lt__(a, _pyb_coerce(b))
+    def __gt__(a, b):   return zbytes.__gt__(a, _pyb_coerce(b))
+    def __le__(a, b):   return zbytes.__le__(a, _pyb_coerce(b))
+    def __ge__(a, b):   return zbytes.__ge__(a, _pyb_coerce(b))
 
     # len - no need to override
 
     # [], [:]
     def __getitem__(self, idx):
-        x = bytes.__getitem__(self, idx)
+        x = zbytes.__getitem__(self, idx)
         if type(idx) is slice:
             return pyb(x)
         else:
@@ -374,7 +382,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     def __contains__(self, key):
         # NOTE on py3 bytes.__contains__ accepts numbers and buffers. We don't want to
         # automatically coerce any of them to bytestrings
-        return bytes.__contains__(self, _pyb_coerce(key))
+        return zbytes.__contains__(self, _pyb_coerce(key))
 
 
     # __add__, __radd__     (no need to override __iadd__)
@@ -385,7 +393,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if type(a) is not pybstr:
             assert type(b) is pybstr
             return b.__radd__(a)
-        return pyb(bytes.__add__(a, _pyb_coerce(b)))
+        return pyb(zbytes.__add__(a, _pyb_coerce(b)))
 
     def __radd__(b, a):
         # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
@@ -403,7 +411,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if type(a) is not pybstr:
             assert type(b) is pybstr
             return b.__rmul__(a)
-        return pyb(bytes.__mul__(a, b))
+        return pyb(zbytes.__mul__(a, b))
     def __rmul__(b, a):
         return b.__mul__(a)
 
@@ -447,7 +455,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if encoding == 'utf-8'  and  errors == 'surrogateescape':
             x = _utf8_decode_surrogateescape(self)
         else:
-            x = bytes.decode(self, encoding, errors)
+            x = zbytes.decode(self, encoding, errors)
         # on py2 e.g. bytes.decode('string-escape') returns bytes
         if PY_MAJOR_VERSION < 3  and  isinstance(x, bytes):
             return pyb(x)
@@ -465,7 +473,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     def casefold(self):                         return pyb(pyu(self).casefold())
     def center(self, width, fillchar=' '):      return pyb(pyu(self).center(width, fillchar))
 
-    def count(self, sub, start=None, end=None): return bytes.count(self, _pyb_coerce(sub), start, end)
+    def count(self, sub, start=None, end=None): return zbytes.count(self, _pyb_coerce(sub), start, end)
 
     def endswith(self, suffix, start=None, end=None):
         if isinstance(suffix, tuple):
@@ -475,13 +483,13 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
             return False
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return bytes.endswith(self, _pyb_coerce(suffix), start, end)
+        return zbytes.endswith(self, _pyb_coerce(suffix), start, end)
 
     def expandtabs(self, tabsize=8):            return pyb(pyu(self).expandtabs(tabsize))
 
     # NOTE find/index & friends should return byte-position, not unicode-position
-    def find(self, sub, start=None, end=None):  return bytes.find(self, _pyb_coerce(sub), start, end)
-    def index(self, sub, start=None, end=None): return bytes.index(self, _pyb_coerce(sub), start, end)
+    def find(self, sub, start=None, end=None):  return zbytes.find(self, _pyb_coerce(sub), start, end)
+    def index(self, sub, start=None, end=None): return zbytes.index(self, _pyb_coerce(sub), start, end)
 
     def isalnum(self):      return pyu(self).isalnum()
     def isalpha(self):      return pyu(self).isalpha()
@@ -495,21 +503,21 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     def isspace(self):      return pyu(self).isspace()
     def istitle(self):      return pyu(self).istitle()
 
-    def join(self, iterable):               return pyb(bytes.join(self, (_pyb_coerce(_) for _ in iterable)))
+    def join(self, iterable):               return pyb(zbytes.join(self, (_pyb_coerce(_) for _ in iterable)))
     def ljust(self, width, fillchar=' '):   return pyb(pyu(self).ljust(width, fillchar))
     def lower(self):                        return pyb(pyu(self).lower())
     def lstrip(self, chars=None):           return pyb(pyu(self).lstrip(chars))
-    def partition(self, sep):               return tuple(pyb(_) for _ in bytes.partition(self, _pyb_coerce(sep)))
+    def partition(self, sep):               return tuple(pyb(_) for _ in zbytes.partition(self, _pyb_coerce(sep)))
     def removeprefix(self, prefix):         return pyb(pyu(self).removeprefix(prefix))
     def removesuffix(self, suffix):         return pyb(pyu(self).removesuffix(suffix))
-    def replace(self, old, new, count=-1):  return pyb(bytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count))
+    def replace(self, old, new, count=-1):  return pyb(zbytes.replace(self, _pyb_coerce(old), _pyb_coerce(new), count))
 
     # NOTE rfind/rindex & friends should return byte-position, not unicode-position
-    def rfind(self, sub, start=None, end=None):   return bytes.rfind(self, _pyb_coerce(sub), start, end)
-    def rindex(self, sub, start=None, end=None):  return bytes.rindex(self, _pyb_coerce(sub), start, end)
+    def rfind(self, sub, start=None, end=None):   return zbytes.rfind(self, _pyb_coerce(sub), start, end)
+    def rindex(self, sub, start=None, end=None):  return zbytes.rindex(self, _pyb_coerce(sub), start, end)
 
     def rjust(self, width, fillchar=' '):   return pyb(pyu(self).rjust(width, fillchar))
-    def rpartition(self, sep):              return tuple(pyb(_) for _ in bytes.rpartition(self, _pyb_coerce(sep)))
+    def rpartition(self, sep):              return tuple(pyb(_) for _ in zbytes.rpartition(self, _pyb_coerce(sep)))
     def rsplit(self, sep=None, maxsplit=-1):
         v = pyu(self).rsplit(sep, maxsplit)
         return list([pyb(_) for _ in v])
@@ -527,16 +535,16 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
             return False
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return bytes.startswith(self, _pyb_coerce(prefix), start, end)
+        return zbytes.startswith(self, _pyb_coerce(prefix), start, end)
 
     def strip(self, chars=None):            return pyb(pyu(self).strip(chars))
     def swapcase(self):                     return pyb(pyu(self).swapcase())
     def title(self):                        return pyb(pyu(self).title())
     def translate(self, table, delete=None):
         # bytes mode  (compatibility with str/py2)
-        if table is None  or isinstance(table, bytes)  or  delete is not None:
+        if table is None  or isinstance(table, zbytes)  or  delete is not None:
             if delete is None:  delete = b''
-            return pyb(bytes.translate(self, table, delete))
+            return pyb(zbytes.translate(self, table, delete))
         # unicode mode
         else:
             return pyb(pyu(self).translate(table))
@@ -566,8 +574,8 @@ cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
 # bytes uses "optimized" and custom .tp_basicsize and .tp_itemsize:
 # https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L26-L32
 # https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L3816-L3820
-(<PyTypeObject*>pybstr) .tp_basicsize  =  (<PyTypeObject*>bytes).tp_basicsize
-(<PyTypeObject*>pybstr) .tp_itemsize   =  (<PyTypeObject*>bytes).tp_itemsize
+(<PyTypeObject*>pybstr) .tp_basicsize  =  (<PyTypeObject*>zbytes).tp_basicsize
+(<PyTypeObject*>pybstr) .tp_itemsize   =  (<PyTypeObject*>zbytes).tp_itemsize
 
 # make sure pybstr C layout corresponds to bytes C layout exactly
 # we patched cython to allow from-bytes cdef class inheritance and we also set
@@ -646,7 +654,7 @@ cdef class pyustr(unicode):
     # retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves.
     def __reduce_ex__(self, protocol):
         if protocol >= 2:
-            return unicode.__reduce_ex__(self, protocol)
+            return zunicode.__reduce_ex__(self, protocol)
         return (
             pycopyreg._reconstructor,
             (self.__class__, self.__class__, _udata(self))
@@ -656,7 +664,7 @@ cdef class pyustr(unicode):
     def __hash__(self):
         # see pybstr.__hash__ for why we stick to hash of current str
         if PY_MAJOR_VERSION >= 3:
-            return unicode.__hash__(self)
+            return zunicode.__hash__(self)
         else:
             return hash(pyb(self))
 
@@ -668,23 +676,23 @@ cdef class pyustr(unicode):
             b = _pyu_coerce(b)
         except TypeError:
             return False
-        return unicode.__eq__(a, b)
+        return zunicode.__eq__(a, b)
     def __ne__(a, b):   return not a.__eq__(b)
-    def __lt__(a, b):   return unicode.__lt__(a, _pyu_coerce(b))
-    def __gt__(a, b):   return unicode.__gt__(a, _pyu_coerce(b))
-    def __le__(a, b):   return unicode.__le__(a, _pyu_coerce(b))
-    def __ge__(a, b):   return unicode.__ge__(a, _pyu_coerce(b))
+    def __lt__(a, b):   return zunicode.__lt__(a, _pyu_coerce(b))
+    def __gt__(a, b):   return zunicode.__gt__(a, _pyu_coerce(b))
+    def __le__(a, b):   return zunicode.__le__(a, _pyu_coerce(b))
+    def __ge__(a, b):   return zunicode.__ge__(a, _pyu_coerce(b))
 
     # len - no need to override
 
     # [], [:]
     def __getitem__(self, idx):
-        return pyu(unicode.__getitem__(self, idx))
+        return pyu(zunicode.__getitem__(self, idx))
 
     # __iter__
     def __iter__(self):
         if PY_MAJOR_VERSION >= 3:
-            return _pyustrIter(unicode.__iter__(self))
+            return _pyustrIter(zunicode.__iter__(self))
         else:
             # on python 2 unicode does not have .__iter__
             return PySeqIter_New(self)
@@ -692,7 +700,7 @@ cdef class pyustr(unicode):
 
     # __contains__
     def __contains__(self, key):
-        return unicode.__contains__(self, _pyu_coerce(key))
+        return zunicode.__contains__(self, _pyu_coerce(key))
 
 
     # __add__, __radd__     (no need to override __iadd__)
@@ -703,7 +711,7 @@ cdef class pyustr(unicode):
         if type(a) is not pyustr:
             assert type(b) is pyustr
             return b.__radd__(a)
-        return pyu(unicode.__add__(a, _pyu_coerce(b)))
+        return pyu(zunicode.__add__(a, _pyu_coerce(b)))
 
     def __radd__(b, a):
         # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
@@ -723,7 +731,7 @@ cdef class pyustr(unicode):
         if type(a) is not pyustr:
             assert type(b) is pyustr
             return b.__rmul__(a)
-        return pyu(unicode.__mul__(a, b))
+        return pyu(zunicode.__mul__(a, b))
     def __rmul__(b, a):
         return b.__mul__(a)
 
@@ -748,7 +756,7 @@ cdef class pyustr(unicode):
         # NOTE not e.g. `_bvformat(_pyu_coerce(format_spec), (self,))` because
         #      the only format code that string.__format__ should support is
         #      's', not e.g. 'r'.
-        return pyu(unicode.__format__(self, format_spec))
+        return pyu(zunicode.__format__(self, format_spec))
 
 
     # encode/decode
@@ -763,7 +771,7 @@ cdef class pyustr(unicode):
         if encoding == 'utf-8'  and  errors == 'surrogateescape':
             x = _utf8_encode_surrogateescape(self)
         else:
-            x = unicode.encode(self, encoding, errors)
+            x = zunicode.encode(self, encoding, errors)
         return pyb(x)
 
     if PY_MAJOR_VERSION < 3:
@@ -774,15 +782,15 @@ cdef class pyustr(unicode):
 
     # all other string methods
 
-    def capitalize(self):   return pyu(unicode.capitalize(self))
-    def casefold(self):     return pyu(unicode.casefold(self))
-    def center(self, width, fillchar=' '):      return pyu(unicode.center(self, width, _pyu_coerce(fillchar)))
+    def capitalize(self):   return pyu(zunicode.capitalize(self))
+    def casefold(self):     return pyu(zunicode.casefold(self))
+    def center(self, width, fillchar=' '):      return pyu(zunicode.center(self, width, _pyu_coerce(fillchar)))
     def count(self, sub, start=None, end=None):
         # cython optimizes unicode.count to directly call PyUnicode_Count -
         # - cannot use None for start/stop  https://github.com/cython/cython/issues/4737
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.count(self, _pyu_coerce(sub), start, end)
+        return zunicode.count(self, _pyu_coerce(sub), start, end)
     def endswith(self, suffix, start=None, end=None):
         if isinstance(suffix, tuple):
             for _ in suffix:
@@ -791,16 +799,16 @@ cdef class pyustr(unicode):
             return False
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.endswith(self, _pyu_coerce(suffix), start, end)
-    def expandtabs(self, tabsize=8):            return pyu(unicode.expandtabs(self, tabsize))
+        return zunicode.endswith(self, _pyu_coerce(suffix), start, end)
+    def expandtabs(self, tabsize=8):            return pyu(zunicode.expandtabs(self, tabsize))
     def find(self, sub, start=None, end=None):
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.find(self, _pyu_coerce(sub), start, end)
+        return zunicode.find(self, _pyu_coerce(sub), start, end)
     def index(self, sub, start=None, end=None):
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.index(self, _pyu_coerce(sub), start, end)
+        return zunicode.index(self, _pyu_coerce(sub), start, end)
 
     # isalnum(self)         no need to override
     # isalpha(self)         no need to override
@@ -814,41 +822,41 @@ cdef class pyustr(unicode):
     # isspace(self)         no need to override
     # istitle(self)         no need to override
 
-    def join(self, iterable):               return pyu(unicode.join(self, (_pyu_coerce(_) for _ in iterable)))
-    def ljust(self, width, fillchar=' '):   return pyu(unicode.ljust(self, width, _pyu_coerce(fillchar)))
-    def lower(self):                        return pyu(unicode.lower(self))
-    def lstrip(self, chars=None):           return pyu(unicode.lstrip(self, _xpyu_coerce(chars)))
-    def partition(self, sep):               return tuple(pyu(_) for _ in unicode.partition(self, _pyu_coerce(sep)))
-    def removeprefix(self, prefix):         return pyu(unicode.removeprefix(self, _pyu_coerce(prefix)))
-    def removesuffix(self, suffix):         return pyu(unicode.removesuffix(self, _pyu_coerce(suffix)))
-    def replace(self, old, new, count=-1):  return pyu(unicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count))
+    def join(self, iterable):               return pyu(zunicode.join(self, (_pyu_coerce(_) for _ in iterable)))
+    def ljust(self, width, fillchar=' '):   return pyu(zunicode.ljust(self, width, _pyu_coerce(fillchar)))
+    def lower(self):                        return pyu(zunicode.lower(self))
+    def lstrip(self, chars=None):           return pyu(zunicode.lstrip(self, _xpyu_coerce(chars)))
+    def partition(self, sep):               return tuple(pyu(_) for _ in zunicode.partition(self, _pyu_coerce(sep)))
+    def removeprefix(self, prefix):         return pyu(zunicode.removeprefix(self, _pyu_coerce(prefix)))
+    def removesuffix(self, suffix):         return pyu(zunicode.removesuffix(self, _pyu_coerce(suffix)))
+    def replace(self, old, new, count=-1):  return pyu(zunicode.replace(self, _pyu_coerce(old), _pyu_coerce(new), count))
     def rfind(self, sub, start=None, end=None):
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.rfind(self, _pyu_coerce(sub), start, end)
+        return zunicode.rfind(self, _pyu_coerce(sub), start, end)
     def rindex(self, sub, start=None, end=None):
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.rindex(self, _pyu_coerce(sub), start, end)
-    def rjust(self, width, fillchar=' '):   return pyu(unicode.rjust(self, width, _pyu_coerce(fillchar)))
-    def rpartition(self, sep):              return tuple(pyu(_) for _ in unicode.rpartition(self, _pyu_coerce(sep)))
+        return zunicode.rindex(self, _pyu_coerce(sub), start, end)
+    def rjust(self, width, fillchar=' '):   return pyu(zunicode.rjust(self, width, _pyu_coerce(fillchar)))
+    def rpartition(self, sep):              return tuple(pyu(_) for _ in zunicode.rpartition(self, _pyu_coerce(sep)))
     def rsplit(self, sep=None, maxsplit=-1):
-        v = unicode.rsplit(self, _xpyu_coerce(sep), maxsplit)
+        v = zunicode.rsplit(self, _xpyu_coerce(sep), maxsplit)
         return list([pyu(_) for _ in v])
-    def rstrip(self, chars=None):           return pyu(unicode.rstrip(self, _xpyu_coerce(chars)))
+    def rstrip(self, chars=None):           return pyu(zunicode.rstrip(self, _xpyu_coerce(chars)))
     def split(self, sep=None, maxsplit=-1):
         # cython optimizes unicode.split to directly call PyUnicode_Split - cannot use None for sep
         # and cannot also use object=NULL  https://github.com/cython/cython/issues/4737
         if sep is None:
             if PY_MAJOR_VERSION >= 3:
-                v = unicode.split(self, maxsplit=maxsplit)
+                v = zunicode.split(self, maxsplit=maxsplit)
             else:
                 # on py2 unicode.split does not accept keyword arguments
-                v = _udata(self).split(None, maxsplit)
+                v = zunicode.split(self, None, maxsplit)
         else:
-            v = unicode.split(self, _pyu_coerce(sep), maxsplit)
+            v = zunicode.split(self, _pyu_coerce(sep), maxsplit)
         return list([pyu(_) for _ in v])
-    def splitlines(self, keepends=False):   return list(pyu(_) for _ in unicode.splitlines(self, keepends))
+    def splitlines(self, keepends=False):   return list(pyu(_) for _ in zunicode.splitlines(self, keepends))
     def startswith(self, prefix, start=None, end=None):
         if isinstance(prefix, tuple):
             for _ in prefix:
@@ -857,10 +865,10 @@ cdef class pyustr(unicode):
             return False
         if start is None: start = 0
         if end   is None: end   = PY_SSIZE_T_MAX
-        return unicode.startswith(self, _pyu_coerce(prefix), start, end)
-    def strip(self, chars=None):            return pyu(unicode.strip(self, _xpyu_coerce(chars)))
-    def swapcase(self):                     return pyu(unicode.swapcase(self))
-    def title(self):                        return pyu(unicode.title(self))
+        return zunicode.startswith(self, _pyu_coerce(prefix), start, end)
+    def strip(self, chars=None):            return pyu(zunicode.strip(self, _xpyu_coerce(chars)))
+    def swapcase(self):                     return pyu(zunicode.swapcase(self))
+    def title(self):                        return pyu(zunicode.title(self))
 
     def translate(self, table):
         # unicode.translate does not accept bstr values
@@ -869,10 +877,10 @@ cdef class pyustr(unicode):
             if not isinstance(v, int):  # either unicode ordinal,
                 v = _xpyu_coerce(v)     # character or None
             t[k] = v
-        return pyu(unicode.translate(self, t))
+        return pyu(zunicode.translate(self, t))
 
-    def upper(self):                        return pyu(unicode.upper(self))
-    def zfill(self, width):                 return pyu(unicode.zfill(self, width))
+    def upper(self):                        return pyu(zunicode.upper(self))
+    def zfill(self, width):                 return pyu(zunicode.zfill(self, width))
 
     @staticmethod
     def maketrans(x=None, y=None, z=None):
@@ -884,11 +892,11 @@ cdef class pyustr(unicode):
                     if not isinstance(k, int):
                         k = pyu(k)
                     _[k] = v
-                return unicode.maketrans(_)
+                return zunicode.maketrans(_)
             elif z is None:
-                return unicode.maketrans(pyu(x), pyu(y))  # std maketrans does not accept b
+                return zunicode.maketrans(pyu(x), pyu(y))  # std maketrans does not accept b
             else:
-                return unicode.maketrans(pyu(x), pyu(y), pyu(z))  # ----//----
+                return zunicode.maketrans(pyu(x), pyu(y), pyu(z))  # ----//----
 
         # hand-made on py2
         t = {}
@@ -994,7 +1002,7 @@ IF PY2:
         assert isinstance(o, bytes)
         o = <bytes>o
         o = bytes(buffer(o))  # change tp_type to bytes instead of pybstr
-        return (<_PyTypeObject_Print*>Py_TYPE(o)) .tp_print(<PyObject*>o, f, Py_PRINT_RAW)
+        return (<_PyTypeObject_Print*>zbytes) .tp_print(<PyObject*>o, f, Py_PRINT_RAW)
 
     (<_PyTypeObject_Print*>Py_TYPE(pybstr())) .tp_print = _pybstr_tp_print
 
@@ -1691,7 +1699,7 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr
 
         #print('--> __mod__ ', repr(fmt1), ' % ', repr(arg))
         try:
-            s = bytes.__mod__(fmt1, arg)
+            s = zbytes.__mod__(fmt1, arg)
         except ValueError as e:
             # adjust position in '... at index <idx>' from fmt1 to fmt
             if len(e.args) == 1:
@@ -1945,7 +1953,7 @@ def _utf8_decode_surrogateescape(const uint8_t[::1] s): # -> unicode
 def _utf8_encode_surrogateescape(s): # -> bytes
     assert isinstance(s, unicode)
     if PY_MAJOR_VERSION >= 3:
-        return unicode.encode(s, 'UTF-8', 'surrogateescape')
+        return zunicode.encode(s, 'UTF-8', 'surrogateescape')
 
     # py2 does not have surrogateescape error handler, and even if we
     # provide one, builtin unicode.encode() does not treat

From baf84437527d32325bea3c59f504a41b341daf42 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 1 May 2023 19:46:26 +0300
Subject: [PATCH 11/29] golang_str: pybstr -> _pybstr  ;  pyustr -> _pyustr

And let pybstr/pyustr point to version of bstr/ustr types that is actually in use:
- when bytes/unicode are not patched -> to _pybstr/_pyustr
- when bytes/unicode will be patched -> to bytes/unicode to where original
  _pybstr/_pyustr were copied during bytes/unicode patching.
at runtime the code uses pybstr/pyustr instead of _pybstr/_pyustr.
---
 golang/_golang_str.pyx | 52 ++++++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 22 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index fae05e8..c52e71b 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -87,6 +87,14 @@ else:
 cdef object zbytes   = <object>(&PyBytes_Type)
 cdef object zunicode = <object>(&PyUnicode_Type)
 
+# pybstr/pyustr point to version of bstr/ustr types that is actually in use:
+# - when bytes/unicode are not patched -> to _pybstr/_pyustr
+# - when bytes/unicode will be patched -> to bytes/unicode to where original
+#   _pybstr/_pyustr were copied during bytes/unicode patching.
+# at runtime the code should use pybstr/pyustr instead of _pybstr/_pyustr.
+pybstr = _pybstr    # initially point to -> _pybstr/_pyustr
+pyustr = _pyustr    # TODO -> cdef for speed
+
 
 def pyb(s): # -> bstr
     """b converts object to bstr.
@@ -250,8 +258,8 @@ def pyuchr(int i):  # -> 1-character ustr
     return pyu(unichr(i))
 
 
-@no_gc                      # note setup.py assist this to compile despite
-cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
+@no_gc                       # note setup.py assist this to compile despite
+cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
     """bstr is byte-string.
 
     It is based on bytes and can automatically convert to/from unicode.
@@ -284,7 +292,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
-    # pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↓ .____new__() .
+    # _pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↓ .____new__() .
     @staticmethod
     def ____new__(cls, object='', encoding=None, errors=None):
         # encoding or errors  ->  object must expose buffer interface
@@ -557,7 +565,7 @@ cdef class pybstr(bytes):   # https://github.com/cython/cython/issues/711
         return pyustr.maketrans(x, y, z)
 
 
-# hand-made pybstr.__new__  (workaround for https://github.com/cython/cython/issues/799)
+# hand-made _pybstr.__new__  (workaround for https://github.com/cython/cython/issues/799)
 cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw) except NULL:
     argv = ()
     if _argv != NULL:
@@ -566,26 +574,26 @@ cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
     if _kw != NULL:
         kw = <object>_kw
 
-    cdef object x = pybstr.____new__(<object>_cls, *argv, **kw)
+    cdef object x = _pybstr.____new__(<object>_cls, *argv, **kw)
     Py_INCREF(x)
     return <PyObject*>x
-(<_XPyTypeObject*>pybstr).tp_new    = &_pybstr_tp_new
+(<_XPyTypeObject*>_pybstr).tp_new   = &_pybstr_tp_new
 
 # bytes uses "optimized" and custom .tp_basicsize and .tp_itemsize:
 # https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L26-L32
 # https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L3816-L3820
-(<PyTypeObject*>pybstr) .tp_basicsize  =  (<PyTypeObject*>zbytes).tp_basicsize
-(<PyTypeObject*>pybstr) .tp_itemsize   =  (<PyTypeObject*>zbytes).tp_itemsize
+(<PyTypeObject*>_pybstr) .tp_basicsize  =  (<PyTypeObject*>zbytes).tp_basicsize
+(<PyTypeObject*>_pybstr) .tp_itemsize   =  (<PyTypeObject*>zbytes).tp_itemsize
 
-# make sure pybstr C layout corresponds to bytes C layout exactly
+# make sure _pybstr C layout corresponds to bytes C layout exactly
 # we patched cython to allow from-bytes cdef class inheritance and we also set
-# .tp_basicsize directly above. All this works ok only if C layouts for pybstr
+# .tp_basicsize directly above. All this works ok only if C layouts for _pybstr
 # and bytes are completely the same.
-assert sizeof(pybstr) == sizeof(PyBytesObject)
+assert sizeof(_pybstr) == sizeof(PyBytesObject)
 
 
 @no_gc
-cdef class pyustr(unicode):
+cdef class _pyustr(unicode):
     """ustr is unicode-string.
 
     It is based on unicode and can automatically convert to/from bytes.
@@ -613,7 +621,7 @@ cdef class pyustr(unicode):
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
-    # pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↓ .____new__() .
+    # _pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↓ .____new__() .
     @staticmethod
     def ____new__(cls, object='', encoding=None, errors=None):
         # encoding or errors  ->  object must expose buffer interface
@@ -662,7 +670,7 @@ cdef class pyustr(unicode):
 
 
     def __hash__(self):
-        # see pybstr.__hash__ for why we stick to hash of current str
+        # see _pybstr.__hash__ for why we stick to hash of current str
         if PY_MAJOR_VERSION >= 3:
             return zunicode.__hash__(self)
         else:
@@ -921,7 +929,7 @@ cdef class pyustr(unicode):
         return t
 
 
-# hand-made pyustr.__new__  (workaround for https://github.com/cython/cython/issues/799)
+# hand-made _pyustr.__new__  (workaround for https://github.com/cython/cython/issues/799)
 cdef PyObject* _pyustr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw) except NULL:
     argv = ()
     if _argv != NULL:
@@ -930,13 +938,13 @@ cdef PyObject* _pyustr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
     if _kw != NULL:
         kw = <object>_kw
 
-    cdef object x = pyustr.____new__(<object>_cls, *argv, **kw)
+    cdef object x = _pyustr.____new__(<object>_cls, *argv, **kw)
     Py_INCREF(x)
     return <PyObject*>x
-(<_XPyTypeObject*>pyustr).tp_new    = &_pyustr_tp_new
+(<_XPyTypeObject*>_pyustr).tp_new   = &_pyustr_tp_new
 
-# similarly to bytes - want same C layout for pyustr vs unicode
-assert sizeof(pyustr) == sizeof(PyUnicodeObject)
+# similarly to bytes - want same C layout for _pyustr vs unicode
+assert sizeof(_pyustr) == sizeof(PyUnicodeObject)
 
 
 # _pyustrIter wraps unicode iterator to return pyustr for each yielded character.
@@ -1004,7 +1012,7 @@ IF PY2:
         o = bytes(buffer(o))  # change tp_type to bytes instead of pybstr
         return (<_PyTypeObject_Print*>zbytes) .tp_print(<PyObject*>o, f, Py_PRINT_RAW)
 
-    (<_PyTypeObject_Print*>Py_TYPE(pybstr())) .tp_print = _pybstr_tp_print
+    (<_PyTypeObject_Print*>Py_TYPE(_pybstr())) .tp_print = _pybstr_tp_print
 
 
 # whiteout .sq_slice for pybstr/pyustr inherited from str/unicode.
@@ -1012,8 +1020,8 @@ IF PY2:
 # If we don't do this e.g. bstr[:] will be handled by str.__getslice__ instead
 # of bstr.__getitem__, and will return str instead of bstr.
 if PY2:
-    (<_XPyTypeObject*>pybstr) .tp_as_sequence.sq_slice = NULL
-    (<_XPyTypeObject*>pyustr) .tp_as_sequence.sq_slice = NULL
+    (<_XPyTypeObject*>_pybstr) .tp_as_sequence.sq_slice = NULL
+    (<_XPyTypeObject*>_pyustr) .tp_as_sequence.sq_slice = NULL
 
 
 # ---- adjust bstr/ustr classes after what cython generated ----

From 90f0e0ff69ef261040e61573efb5bdd1fa91d2e6 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Fri, 23 Jun 2023 17:54:03 +0300
Subject: [PATCH 12/29] strconv: Add benchmarks for quote and unquote
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This functions are currently relatively slow. They were initially used
in zodbdump and zodbrestore, where their speed did not matter much, but
with bstr and ustr, since e.g. quote is used in repr, not having them to
perform with speed similar to builtin string escaping starts to be an
issue. Tatuya Kamada reports at https://lab.nexedi.com/nexedi/pygolang/merge_requests/21#note_170833 :

    ### 3. `u` seems slow with large arrays especially when `repr` it

    I have faced a slowness while testing `u`, `b` with python 2.7, especially with `repr`.

    ```python
    >>> timeit.timeit("from golang import b,u; u('あ'*199998)", number=10)
    2.02020001411438
    >>> timeit.timeit("from golang import b,u; repr(u('あ'*199998))", number=10)
    54.60263395309448
    ```

    `bytes`(str) is very fast.

    ```python
    >>> timeit.timeit("from golang import b,u; bytes('あ'*199998)", number=10)
    0.000392913818359375
    >>> timeit.timeit("from golang import b,u; repr(bytes('あ'*199998))", number=10)
    0.4604980945587158
    ```

    `b` is much faster than `u`, but still the repr seems slow.

    ```
    >>> timeit.timeit("from golang import b,u; b('あ'*199998)", number=10)
    0.0009968280792236328
    >>> timeit.timeit("from golang import b,u; repr(b('あ'*199998))", number=10)
    25.498882055282593
    ```

The "repr" part of this problem is due to that both bstr.__repr__ and
ustr.__repr__ use custom quoting routines which currently are implemented in
pure python in strconv module:

https://lab.nexedi.com/kirr/pygolang/blob/300d7dfa/golang/_golang_str.pyx#L282-291
https://lab.nexedi.com/kirr/pygolang/blob/300d7dfa/golang/_golang_str.pyx#L582-591
https://lab.nexedi.com/kirr/pygolang/blob/300d7dfa/golang/_golang_str.pyx#L941-970
https://lab.nexedi.com/kirr/pygolang/blob/300d7dfa/golang/strconv.py#L31-92

The fix would be to move strconv.py to Cython and to correspondingly rework it
to avoid using python-level constructs during quoting internally.

Working on that was not a priority, but soon I will need to move strconv to
Cython for another reason: to be able to break import cycle in between _golang
and strconv.

So it makes sense to add strconv benchmark first - since we'll start moving it
to Cython anyway - to see where we are and how further changes will help
performance-wise.

Currently we are at

    name                 time/op
    quote[a]              910µs ± 0%
    quote[\u03b1]        1.23ms ± 0%
    quote[\u65e5]         800µs ± 0%
    quote[\U0001f64f]    1.06ms ± 1%
    stdquote             1.17µs ± 0%
    unquote[a]           1.33ms ± 1%
    unquote[\u03b1]       952µs ± 2%
    unquote[\u65e5]       613µs ± 2%
    unquote[\U0001f64f]  3.62ms ± 1%
    stdunquote            788ns ± 0%

i.e. on py2 quoting is ~ 1000x slower than builtin string escaping, and unquoting is
even slower.

on py3 the situation is better, but still not good:

    name                 time/op
    quote[a]              579µs ± 1%
    quote[\u03b1]         942µs ± 1%
    quote[\u65e5]         595µs ± 0%
    quote[\U0001f64f]     274µs ± 1%
    stdquote             2.70µs ± 0%
    unquote[a]            696µs ± 1%
    unquote[\u03b1]       763µs ± 0%
    unquote[\u65e5]       474µs ± 1%
    unquote[\U0001f64f]   187µs ± 0%
    stdunquote            808ns ± 0%

δ(py2, py3) for the reference:

    name                 py2 time/op  py3 time/op  delta
    quote[a]              910µs ± 0%   579µs ± 1%   -36.42%  (p=0.008 n=5+5)
    quote[\u03b1]        1.23ms ± 0%  0.94ms ± 1%   -23.17%  (p=0.008 n=5+5)
    quote[\u65e5]         800µs ± 0%   595µs ± 0%   -25.63%  (p=0.016 n=4+5)
    quote[\U0001f64f]    1.06ms ± 1%  0.27ms ± 1%   -74.23%  (p=0.008 n=5+5)
    stdquote             1.17µs ± 0%  2.70µs ± 0%  +129.71%  (p=0.008 n=5+5)
    unquote[a]           1.33ms ± 1%  0.70ms ± 1%   -47.71%  (p=0.008 n=5+5)
    unquote[\u03b1]       952µs ± 2%   763µs ± 0%   -19.82%  (p=0.008 n=5+5)
    unquote[\u65e5]       613µs ± 2%   474µs ± 1%   -22.76%  (p=0.008 n=5+5)
    unquote[\U0001f64f]  3.62ms ± 1%  0.19ms ± 0%   -94.84%  (p=0.016 n=5+4)
    stdunquote            788ns ± 0%   808ns ± 0%    +2.59%  (p=0.016 n=4+5)
---
 golang/strconv_test.py | 56 ++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/golang/strconv_test.py b/golang/strconv_test.py
index 5dc68c1..31fdca0 100644
--- a/golang/strconv_test.py
+++ b/golang/strconv_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2022  Nexedi SA and Contributors.
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -26,7 +26,10 @@
 
 from six import int2byte as bchr
 from six.moves import range as xrange
-from pytest import raises
+from pytest import raises, mark
+
+import codecs
+
 
 def byterange(start, stop):
     b = b""
@@ -138,3 +141,52 @@ def test_unquote_bad():
         with raises(ValueError) as exc:
             unquote(tin)
         assert exc.value.args == (err,)
+
+
+# ---- benchmarks ----
+
+# quoting + unquoting
+uchar_testv = ['a',               # ascii
+               u'α',              # 2-bytes utf8
+               u'\u65e5',         # 3-bytes utf8
+               u'\U0001f64f']     # 4-bytes utf8
+
+@mark.parametrize('ch', uchar_testv)
+def bench_quote(b, ch):
+    s = bstr_ch1000(ch)
+    q = quote
+    for i in xrange(b.N):
+        q(s)
+
+def bench_stdquote(b):
+    s = b'a'*1000
+    q = repr
+    for i in xrange(b.N):
+        q(s)
+
+
+@mark.parametrize('ch', uchar_testv)
+def bench_unquote(b, ch):
+    s = bstr_ch1000(ch)
+    s = quote(s)
+    unq = unquote
+    for i in xrange(b.N):
+        unq(s)
+
+def bench_stdunquote(b):
+    s = b'"' + b'a'*1000 + b'"'
+    escape_decode = codecs.escape_decode
+    def unq(s): return escape_decode(s[1:-1])[0]
+    for i in xrange(b.N):
+        unq(s)
+
+
+# bstr_ch1000 returns bstr with many repetitions of character ch occupying ~ 1000 bytes.
+def bstr_ch1000(ch): # -> bstr
+    assert len(ch) == 1
+    s = bstr(ch)
+    s = s * (1000 // len(s))
+    if len(s) % 3 == 0:
+        s += 'x'
+    assert len(s) == 1000
+    return s

From 83a1da997a752e136c09992fb2f9e5398aa003f3 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 20:55:06 +0300
Subject: [PATCH 13/29] golang, libgolang: Add byte / rune types

Those types are the base when working with byte- and unicode strings.
It will be clearer to use them explicitly instead of uint8_t and int32_t
when processing string.
---
 golang/_golang.pxd | 5 ++++-
 golang/libgolang.h | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/golang/_golang.pxd b/golang/_golang.pxd
index 41a2953..2e24025 100644
--- a/golang/_golang.pxd
+++ b/golang/_golang.pxd
@@ -1,5 +1,5 @@
 # cython: language_level=2
-# Copyright (C) 2019-2022  Nexedi SA and Contributors.
+# Copyright (C) 2019-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -65,6 +65,9 @@ cdef extern from *:
 # on the edge of Python/nogil world.
 from libcpp.string cimport string  # golang::string = std::string
 cdef extern from "golang/libgolang.h" namespace "golang" nogil:
+    ctypedef unsigned char  byte
+    ctypedef signed int     rune  # = int32
+
     void panic(const char *)
     const char *recover()
 
diff --git a/golang/libgolang.h b/golang/libgolang.h
index 2cd8abe..0d4c153 100644
--- a/golang/libgolang.h
+++ b/golang/libgolang.h
@@ -433,6 +433,10 @@ constexpr Nil nil = nullptr;
 // string is alias for std::string.
 using string = std::string;
 
+// byte/rune types related to string.
+using byte = uint8_t;
+using rune = int32_t;
+
 // func is alias for std::function.
 template<typename F>
 using func = std::function<F>;

From b9d72051a4d4913aa7d064bf95e58fb5793dec77 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 21:01:11 +0300
Subject: [PATCH 14/29] *: uint8_t -> byte,  unicode-codepint -> rune

We added byte and rune types in the previous patch. Let's use them now
throughout whole codebase where appropriate.

Currently the only place where unicode-codepoint is used is
_utf8_decode_rune. uint8_t was used in many places.
---
 golang/_golang_str.pyx             | 13 ++++++-------
 golang/runtime/_runtime_gevent.pyx |  8 ++++----
 2 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index c52e71b..3b72ca5 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -68,7 +68,6 @@ cdef extern from "Python.h":
 
 from cython cimport no_gc
 
-from libc.stdint cimport uint8_t
 from libc.stdio cimport FILE
 
 pystrconv = None  # = golang.strconv imported at runtime (see __init__.py)
@@ -1056,7 +1055,7 @@ _bstrustr_remove_unsupported_slots()
 # NOTE the return type is str type of current python, so that quoted result
 # could be directly used in __repr__ or __str__ implementation.
 cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
-    # TODO change to `const uint8_t[::1] s` after strconv._quote is moved to pyx
+    # TODO change to `const byte[::1] s` after strconv._quote is moved to pyx
     if isinstance(s, bytearray):
         s = _bytearray_data(s)
     assert isinstance(s, bytes), s
@@ -1498,7 +1497,7 @@ cdef class _UnboundMethod(object): # they removed unbound methods on py3
 # See also overview of patching bytes.{__repr__,__str__} near _bstringify.
 cdef object _missing  = object()
 cdef object _atidx_re = pyre.compile('.* at index ([0-9]+)$')
-cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr
+cdef _bprintf(const byte[::1] fmt, xarg): # -> pybstr
     cdef bytearray out = bytearray()
 
     cdef tuple  argv = None  # if xarg is tuple
@@ -1570,7 +1569,7 @@ cdef _bprintf(const uint8_t[::1] fmt, xarg): # -> pybstr
     # differently - on b %r is aliased to %a.
     cdef int i = 0
     cdef int l = len(fmt)
-    cdef uint8_t c
+    cdef byte c
     while i < l:
         c  = fmt[i]
         i += 1
@@ -1883,9 +1882,9 @@ assert    _ucs2_build or sys.maxunicode >= 0x0010ffff       # or ucs4
 # _utf8_decode_rune decodes next UTF8-character from byte string s.
 #
 # _utf8_decode_rune(s) -> (r, size)
-def _py_utf8_decode_rune(const uint8_t[::1] s):
+def _py_utf8_decode_rune(const byte[::1] s):
     return _utf8_decode_rune(s)
-cdef (int, int) _utf8_decode_rune(const uint8_t[::1] s):
+cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
     if len(s) == 0:
         return _rune_error, 0
 
@@ -1918,7 +1917,7 @@ cdef (int, int) _utf8_decode_rune(const uint8_t[::1] s):
 
 
 # _utf8_decode_surrogateescape mimics s.decode('utf-8', 'surrogateescape') from py3.
-def _utf8_decode_surrogateescape(const uint8_t[::1] s): # -> unicode
+def _utf8_decode_surrogateescape(const byte[::1] s): # -> unicode
     if PY_MAJOR_VERSION >= 3:
         if len(s) == 0:
             return u''  # avoid out-of-bounds slice access on &s[0]
diff --git a/golang/runtime/_runtime_gevent.pyx b/golang/runtime/_runtime_gevent.pyx
index 8c4751d..dcf4f33 100644
--- a/golang/runtime/_runtime_gevent.pyx
+++ b/golang/runtime/_runtime_gevent.pyx
@@ -40,7 +40,7 @@ ELSE:
 
 from gevent import sleep as pygsleep
 
-from libc.stdint cimport uint8_t, uint64_t
+from libc.stdint cimport uint64_t
 from cpython cimport PyObject, Py_INCREF, Py_DECREF
 from cython cimport final
 
@@ -49,7 +49,7 @@ from golang.runtime._libgolang cimport _libgolang_runtime_ops, _libgolang_sema,
 from golang.runtime.internal cimport syscall
 from golang.runtime cimport _runtime_thread
 from golang.runtime._runtime_pymisc cimport PyExc, pyexc_fetch, pyexc_restore
-from golang cimport topyexc
+from golang cimport byte, topyexc
 
 from libc.stdlib cimport calloc, free
 from libc.errno  cimport EBADF
@@ -343,7 +343,7 @@ cdef nogil:
 cdef:
     bint _io_read(IOH* ioh, int* out_n, void *buf, size_t count):
         pygfobj = <object>ioh.pygfobj
-        cdef uint8_t[::1] mem = <uint8_t[:count]>buf
+        cdef byte[::1] mem = <byte[:count]>buf
         xmem = memoryview(mem) # to avoid https://github.com/cython/cython/issues/3900 on mem[:0]=b''
         try:
             # NOTE buf might be on stack, so it must not be accessed, e.g. from
@@ -380,7 +380,7 @@ cdef nogil:
 cdef:
     bint _io_write(IOH* ioh, int* out_n, const void *buf, size_t count):
         pygfobj = <object>ioh.pygfobj
-        cdef const uint8_t[::1] mem = <const uint8_t[:count]>buf
+        cdef const byte[::1] mem = <const byte[:count]>buf
 
         # NOTE buf might be on stack, so it must not be accessed, e.g. from
         # FileObjectThread, while our greenlet is parked (see STACK_DEAD_WHILE_PARKED

From 4a022e69403b041aa6eee38ac1244bf354ba9519 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 21:09:34 +0300
Subject: [PATCH 15/29] unicode/utf8: Start of the package  (stub)

We will soon need to use error rune codepoint from both golang_str.pyx
and strconv.pyx - so we need to move that definition into shared place.
What fits best is unicode/utf8, so start that package and move the
constant there.
---
 golang/_golang.pyx         |  2 +-
 golang/_golang_str.pyx     | 11 ++++++-----
 golang/pyx/build.py        |  3 +++
 golang/unicode/__init__.py |  0
 golang/unicode/_utf8.pxd   | 28 ++++++++++++++++++++++++++++
 golang/unicode/utf8.h      | 36 ++++++++++++++++++++++++++++++++++++
 golang/unicode/utf8.pxd    | 26 ++++++++++++++++++++++++++
 7 files changed, 100 insertions(+), 6 deletions(-)
 create mode 100644 golang/unicode/__init__.py
 create mode 100644 golang/unicode/_utf8.pxd
 create mode 100644 golang/unicode/utf8.h
 create mode 100644 golang/unicode/utf8.pxd

diff --git a/golang/_golang.pyx b/golang/_golang.pyx
index 5cca599..24f7f23 100644
--- a/golang/_golang.pyx
+++ b/golang/_golang.pyx
@@ -3,7 +3,7 @@
 # cython: binding=False
 # cython: c_string_type=str, c_string_encoding=utf8
 # distutils: language = c++
-# distutils: depends = libgolang.h os/signal.h _golang_str.pyx
+# distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx
 #
 # Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 3b72ca5..ba127b6 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -22,6 +22,8 @@
 It is included from _golang.pyx .
 """
 
+from golang.unicode cimport utf8
+
 from cpython cimport PyUnicode_AsUnicode, PyUnicode_GetSize, PyUnicode_FromUnicode
 from cpython cimport PyUnicode_DecodeUTF8
 from cpython cimport PyTypeObject, Py_TYPE, reprfunc, richcmpfunc, binaryfunc
@@ -1873,8 +1875,7 @@ cdef extern from "Python.h":
 from six import unichr                      # py2: unichr       py3: chr
 from six import int2byte as bchr            # py2: chr          py3: lambda x: bytes((x,))
 
-cdef int _rune_error = 0xFFFD # unicode replacement character
-_py_rune_error = _rune_error
+_py_rune_error = utf8.RuneError
 
 cdef bint _ucs2_build = (sys.maxunicode ==     0xffff)      #    ucs2
 assert    _ucs2_build or sys.maxunicode >= 0x0010ffff       # or ucs4
@@ -1886,7 +1887,7 @@ def _py_utf8_decode_rune(const byte[::1] s):
     return _utf8_decode_rune(s)
 cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
     if len(s) == 0:
-        return _rune_error, 0
+        return utf8.RuneError, 0
 
     cdef int l = min(len(s), 4)  # max size of an UTF-8 encoded character
     while l > 0:
@@ -1913,7 +1914,7 @@ cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
         continue
 
     # invalid UTF-8
-    return _rune_error, 1
+    return utf8.RuneError, 1
 
 
 # _utf8_decode_surrogateescape mimics s.decode('utf-8', 'surrogateescape') from py3.
@@ -1932,7 +1933,7 @@ def _utf8_decode_surrogateescape(const byte[::1] s): # -> unicode
 
     while len(s) > 0:
         r, width = _utf8_decode_rune(s)
-        if r == _rune_error  and  width == 1:
+        if r == utf8.RuneError  and  width == 1:
             b = s[0]
             assert 0x80 <= b <= 0xff, b
             emit(unichr(0xdc00 + b))
diff --git a/golang/pyx/build.py b/golang/pyx/build.py
index 48f40ab..95e0b17 100644
--- a/golang/pyx/build.py
+++ b/golang/pyx/build.py
@@ -226,6 +226,7 @@ def _with_build_defaults(name, kw):   # -> (pygo, kw')
         'os.h',
         'os/signal.h',
         'pyx/runtime.h',
+        'unicode/utf8.h',
         '_testing.h',
         '_compat/windows/strings.h',
         '_compat/windows/unistd.h',
@@ -274,6 +275,8 @@ def Extension(name, sources, **kw):
         'os/signal.pxd',
         'os/_signal.pxd',
         'pyx/runtime.pxd',
+        'unicode/utf8.pxd',
+        'unicode/_utf8.pxd',
     ]])
     kw['depends'] = dependv
 
diff --git a/golang/unicode/__init__.py b/golang/unicode/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/golang/unicode/_utf8.pxd b/golang/unicode/_utf8.pxd
new file mode 100644
index 0000000..523db10
--- /dev/null
+++ b/golang/unicode/_utf8.pxd
@@ -0,0 +1,28 @@
+# cython: language_level=2
+# Copyright (C) 2023  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""Package utf8 mirrors Go package utf8.
+
+See https://golang.org/pkg/unicode/utf8 for Go utf8 package documentation.
+"""
+
+from golang cimport rune
+
+cdef extern from "golang/unicode/utf8.h" namespace "golang::unicode::utf8" nogil:
+    rune RuneError
diff --git a/golang/unicode/utf8.h b/golang/unicode/utf8.h
new file mode 100644
index 0000000..c43af31
--- /dev/null
+++ b/golang/unicode/utf8.h
@@ -0,0 +1,36 @@
+#ifndef _NXD_LIBGOLANG_UNICODE_UTF8_H
+#define _NXD_LIBGOLANG_UNICODE_UTF8_H
+
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// Package utf8 mirrors Go package utf8.
+
+#include <golang/libgolang.h>
+
+// golang::unicode::utf8::
+namespace golang {
+namespace unicode {
+namespace utf8 {
+
+constexpr rune RuneError = 0xFFFD;  // unicode replacement character
+
+}}} // golang::os::utf8::
+
+#endif  // _NXD_LIBGOLANG_UNICODE_UTF8_H
diff --git a/golang/unicode/utf8.pxd b/golang/unicode/utf8.pxd
new file mode 100644
index 0000000..6ba154a
--- /dev/null
+++ b/golang/unicode/utf8.pxd
@@ -0,0 +1,26 @@
+# cython: language_level=2
+# Copyright (C) 2023  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""Package utf8 mirrors Go package utf8.
+
+See _utf8.pxd for package documentation.
+"""
+
+# redirect cimport: golang.unicode.utf8 -> golang.unicode._utf8 (see __init__.pxd for rationale)
+from golang.unicode._utf8 cimport *

From ca559325f0d213209b2db509bf65f94770617b6c Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 21:30:06 +0300
Subject: [PATCH 16/29] strconv: Move it to pyx
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

So far this is plain code movement with no type annotations added and
internal from-strconv imports still being done via py level.

As expected this does not help practically for performance yet:

    name                 old time/op  new time/op  delta
    quote[a]              910µs ± 0%   805µs ± 0%  -11.54%  (p=0.008 n=5+5)
    quote[\u03b1]        1.23ms ± 0%  1.21ms ± 0%   -1.24%  (p=0.008 n=5+5)
    quote[\u65e5]         800µs ± 0%   785µs ± 0%   -1.86%  (p=0.016 n=4+5)
    quote[\U0001f64f]    1.06ms ± 1%  1.04ms ± 0%   -1.92%  (p=0.008 n=5+5)
    stdquote             1.17µs ± 0%  1.18µs ± 0%   +0.80%  (p=0.008 n=5+5)
    unquote[a]           1.33ms ± 1%  1.26ms ± 0%   -5.13%  (p=0.008 n=5+5)
    unquote[\u03b1]       952µs ± 2%   911µs ± 1%   -4.25%  (p=0.008 n=5+5)
    unquote[\u65e5]       613µs ± 2%   592µs ± 0%   -3.48%  (p=0.008 n=5+5)
    unquote[\U0001f64f]  3.62ms ± 1%  3.46ms ± 0%   -4.32%  (p=0.008 n=5+5)
    stdunquote            788ns ± 0%   812ns ± 1%   +3.07%  (p=0.016 n=4+5)
---
 golang/.gitignore   |   1 +
 golang/_strconv.pxd |  21 +++++
 golang/_strconv.pyx | 183 ++++++++++++++++++++++++++++++++++++++++++++
 golang/pyx/build.py |   2 +
 golang/strconv.pxd  |  26 +++++++
 golang/strconv.py   | 166 ++--------------------------------------
 setup.py            |   3 +
 7 files changed, 242 insertions(+), 160 deletions(-)
 create mode 100644 golang/_strconv.pxd
 create mode 100644 golang/_strconv.pyx
 create mode 100644 golang/strconv.pxd

diff --git a/golang/.gitignore b/golang/.gitignore
index 892afba..7492664 100644
--- a/golang/.gitignore
+++ b/golang/.gitignore
@@ -9,6 +9,7 @@
 /_io.cpp
 /_os.cpp
 /_os_test.cpp
+/_strconv.cpp
 /_strings_test.cpp
 /_sync.cpp
 /_sync_test.cpp
diff --git a/golang/_strconv.pxd b/golang/_strconv.pxd
new file mode 100644
index 0000000..5df4aef
--- /dev/null
+++ b/golang/_strconv.pxd
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+# cython: language_level=2
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""Package strconv provides Go-compatible string conversions."""
diff --git a/golang/_strconv.pyx b/golang/_strconv.pyx
new file mode 100644
index 0000000..deb2d78
--- /dev/null
+++ b/golang/_strconv.pyx
@@ -0,0 +1,183 @@
+# -*- coding: utf-8 -*-
+# cython: language_level=2
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""_strconv.pyx implements strconv.pyx - see _strconv.pxd for package overview."""
+
+from __future__ import print_function, absolute_import
+
+import unicodedata, codecs
+from six.moves import range as xrange
+
+from golang import b
+from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
+
+
+# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
+def quote(s):  # -> bstr
+    q, _ = _quote(b(s), b'"')
+    return b(q)
+
+def _quote(s, quote): # -> (quoted, nonascii_escape)
+    assert isinstance(s, bytes),     type(s)
+    assert isinstance(quote, bytes), type(quote)
+    assert len(quote) == 1,          repr(quote)
+
+    outv = []
+    emit = outv.append
+    nonascii_escape = False
+    i = 0
+    while i < len(s):
+        c = s[i:i+1]
+        # fast path - ASCII only
+        if ord(c) < 0x80:
+            if c in (b'\\', quote):
+                emit(b'\\'+c)
+
+            # printable ASCII
+            elif b' ' <= c <= b'\x7e':
+                emit(c)
+
+            # non-printable ASCII
+            elif c == b'\t':
+                emit(br'\t')
+            elif c == b'\n':
+                emit(br'\n')
+            elif c == b'\r':
+                emit(br'\r')
+
+            # everything else is non-printable
+            else:
+                emit(br'\x%02x' % ord(c))
+
+            i += 1
+
+        # slow path - full UTF-8 decoding + unicodedata
+        else:
+            r, size = _utf8_decode_rune(s[i:])
+            isize = i + size
+
+            # decode error - just emit raw byte as escaped
+            if r == _rune_error  and  size == 1:
+                nonascii_escape = True
+                emit(br'\x%02x' % ord(c))
+
+            # printable utf-8 characters go as is
+            elif unicodedata.category(_xunichr(r))[0] in _printable_cat0:
+                emit(s[i:isize])
+
+            # everything else goes in numeric byte escapes
+            else:
+                nonascii_escape = True
+                for j in xrange(i, isize):
+                    emit(br'\x%02x' % ord(s[j:j+1]))
+
+            i = isize
+
+    return (quote + b''.join(outv) + quote, nonascii_escape)
+
+
+# unquote decodes "-quoted unicode|byte string.
+#
+# ValueError is raised if there are quoting syntax errors.
+def unquote(s):  # -> bstr
+    us, tail = unquote_next(s)
+    if len(tail) != 0:
+        raise ValueError('non-empty tail after closing "')
+    return us
+
+# unquote_next decodes next "-quoted unicode|byte string.
+#
+# it returns -> (unquoted(s), tail-after-")
+#
+# ValueError is raised if there are quoting syntax errors.
+def unquote_next(s):  # -> (bstr, bstr)
+    us, tail = _unquote_next(b(s))
+    return b(us), b(tail)
+
+def _unquote_next(s):
+    assert isinstance(s, bytes)
+
+    if len(s) == 0 or s[0:0+1] != b'"':
+        raise ValueError('no starting "')
+
+    outv = []
+    emit= outv.append
+
+    s = s[1:]
+    while 1:
+        r, width = _utf8_decode_rune(s)
+        if width == 0:
+            raise ValueError('no closing "')
+
+        if r == ord('"'):
+            s = s[1:]
+            break
+
+        # regular UTF-8 character
+        if r != ord('\\'):
+            emit(s[:width])
+            s = s[width:]
+            continue
+
+        if len(s) < 2:
+            raise ValueError('unexpected EOL after \\')
+
+        c = s[1:1+1]
+
+        # \<c> -> <c>   ; c = \ "
+        if c in b'\\"':
+            emit(c)
+            s = s[2:]
+            continue
+
+        # \t \n \r
+        uc = None
+        if   c == b't':  uc = b'\t'
+        elif c == b'n':  uc = b'\n'
+        elif c == b'r':  uc = b'\r'
+        # accept also \a \b \v \f that Go might produce
+        # Python also decodes those escapes even though it does not produce them:
+        # https://github.com/python/cpython/blob/2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L677-L688
+        elif c == b'a':  uc = b'\x07'
+        elif c == b'b':  uc = b'\x08'
+        elif c == b'v':  uc = b'\x0b'
+        elif c == b'f':  uc = b'\x0c'
+
+        if uc is not None:
+            emit(uc)
+            s = s[2:]
+            continue
+
+        # \x?? hex
+        if c == b'x':   # XXX also handle octals?
+            if len(s) < 2+2:
+                raise ValueError('unexpected EOL after \\x')
+
+            b = codecs.decode(s[2:2+2], 'hex')
+            emit(b)
+            s = s[2+2:]
+            continue
+
+        raise ValueError('invalid escape \\%s' % chr(ord(c[0:0+1])))
+
+    return b''.join(outv), s
+
+
+_printable_cat0 = frozenset(['L', 'N', 'P', 'S'])   # letters, numbers, punctuation, symbols
diff --git a/golang/pyx/build.py b/golang/pyx/build.py
index 95e0b17..1cb5f3f 100644
--- a/golang/pyx/build.py
+++ b/golang/pyx/build.py
@@ -265,6 +265,8 @@ def Extension(name, sources, **kw):
         '_fmt.pxd',
         'io.pxd',
         '_io.pxd',
+        'strconv.pxd',
+        '_strconv.pxd',
         'strings.pxd',
         'sync.pxd',
         '_sync.pxd',
diff --git a/golang/strconv.pxd b/golang/strconv.pxd
new file mode 100644
index 0000000..dd1d2b9
--- /dev/null
+++ b/golang/strconv.pxd
@@ -0,0 +1,26 @@
+# cython: language_level=2
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""Package strconv provides Go-compatible string conversions.
+
+See _strconv.pxd for package documentation.
+"""
+
+# redirect cimport: golang.strconv -> golang._strconv (see __init__.pxd for rationale)
+from golang._strconv cimport *
diff --git a/golang/strconv.py b/golang/strconv.py
index 0408a0c..fec0ac9 100644
--- a/golang/strconv.py
+++ b/golang/strconv.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2022  Nexedi SA and Contributors.
+# Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -21,162 +21,8 @@
 
 from __future__ import print_function, absolute_import
 
-import unicodedata, codecs
-from six.moves import range as xrange
-
-from golang import b
-from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
-
-
-# quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
-def quote(s):  # -> bstr
-    q, _ = _quote(b(s), b'"')
-    return b(q)
-
-def _quote(s, quote): # -> (quoted, nonascii_escape)
-    assert isinstance(s, bytes),     type(s)
-    assert isinstance(quote, bytes), type(quote)
-    assert len(quote) == 1,          repr(quote)
-
-    outv = []
-    emit = outv.append
-    nonascii_escape = False
-    i = 0
-    while i < len(s):
-        c = s[i:i+1]
-        # fast path - ASCII only
-        if ord(c) < 0x80:
-            if c in (b'\\', quote):
-                emit(b'\\'+c)
-
-            # printable ASCII
-            elif b' ' <= c <= b'\x7e':
-                emit(c)
-
-            # non-printable ASCII
-            elif c == b'\t':
-                emit(br'\t')
-            elif c == b'\n':
-                emit(br'\n')
-            elif c == b'\r':
-                emit(br'\r')
-
-            # everything else is non-printable
-            else:
-                emit(br'\x%02x' % ord(c))
-
-            i += 1
-
-        # slow path - full UTF-8 decoding + unicodedata
-        else:
-            r, size = _utf8_decode_rune(s[i:])
-            isize = i + size
-
-            # decode error - just emit raw byte as escaped
-            if r == _rune_error  and  size == 1:
-                nonascii_escape = True
-                emit(br'\x%02x' % ord(c))
-
-            # printable utf-8 characters go as is
-            elif unicodedata.category(_xunichr(r))[0] in _printable_cat0:
-                emit(s[i:isize])
-
-            # everything else goes in numeric byte escapes
-            else:
-                nonascii_escape = True
-                for j in xrange(i, isize):
-                    emit(br'\x%02x' % ord(s[j:j+1]))
-
-            i = isize
-
-    return (quote + b''.join(outv) + quote, nonascii_escape)
-
-
-# unquote decodes "-quoted unicode|byte string.
-#
-# ValueError is raised if there are quoting syntax errors.
-def unquote(s):  # -> bstr
-    us, tail = unquote_next(s)
-    if len(tail) != 0:
-        raise ValueError('non-empty tail after closing "')
-    return us
-
-# unquote_next decodes next "-quoted unicode|byte string.
-#
-# it returns -> (unquoted(s), tail-after-")
-#
-# ValueError is raised if there are quoting syntax errors.
-def unquote_next(s):  # -> (bstr, bstr)
-    us, tail = _unquote_next(b(s))
-    return b(us), b(tail)
-
-def _unquote_next(s):
-    assert isinstance(s, bytes)
-
-    if len(s) == 0 or s[0:0+1] != b'"':
-        raise ValueError('no starting "')
-
-    outv = []
-    emit= outv.append
-
-    s = s[1:]
-    while 1:
-        r, width = _utf8_decode_rune(s)
-        if width == 0:
-            raise ValueError('no closing "')
-
-        if r == ord('"'):
-            s = s[1:]
-            break
-
-        # regular UTF-8 character
-        if r != ord('\\'):
-            emit(s[:width])
-            s = s[width:]
-            continue
-
-        if len(s) < 2:
-            raise ValueError('unexpected EOL after \\')
-
-        c = s[1:1+1]
-
-        # \<c> -> <c>   ; c = \ "
-        if c in b'\\"':
-            emit(c)
-            s = s[2:]
-            continue
-
-        # \t \n \r
-        uc = None
-        if   c == b't':  uc = b'\t'
-        elif c == b'n':  uc = b'\n'
-        elif c == b'r':  uc = b'\r'
-        # accept also \a \b \v \f that Go might produce
-        # Python also decodes those escapes even though it does not produce them:
-        # https://github.com/python/cpython/blob/2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L677-L688
-        elif c == b'a':  uc = b'\x07'
-        elif c == b'b':  uc = b'\x08'
-        elif c == b'v':  uc = b'\x0b'
-        elif c == b'f':  uc = b'\x0c'
-
-        if uc is not None:
-            emit(uc)
-            s = s[2:]
-            continue
-
-        # \x?? hex
-        if c == b'x':   # XXX also handle octals?
-            if len(s) < 2+2:
-                raise ValueError('unexpected EOL after \\x')
-
-            b = codecs.decode(s[2:2+2], 'hex')
-            emit(b)
-            s = s[2+2:]
-            continue
-
-        raise ValueError('invalid escape \\%s' % chr(ord(c[0:0+1])))
-
-    return b''.join(outv), s
-
-
-_printable_cat0 = frozenset(['L', 'N', 'P', 'S'])   # letters, numbers, punctuation, symbols
+from golang._strconv import \
+    quote,       \
+    _quote,      \
+    unquote,     \
+    unquote_next
diff --git a/setup.py b/setup.py
index bd5148b..f3ef37f 100644
--- a/setup.py
+++ b/setup.py
@@ -316,6 +316,9 @@ def get_python_libdir():
                     Ext('golang.os._signal',
                         ['golang/os/_signal.pyx']),
 
+                    Ext('golang._strconv',
+                        ['golang/_strconv.pyx']),
+
                     Ext('golang._strings_test',
                         ['golang/_strings_test.pyx',
                          'golang/strings_test.cpp']),

From 533bd30acf001ffbc3658d6603ef3e0f5b49d3a9 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 22:09:40 +0300
Subject: [PATCH 17/29] golang, strconv: Switch them to cimport each other at
 pyx level
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since 50b8cb7e (strconv: Move functionality related to UTF8
encode/decode into _golang_str) both golang_str and strconv import each
other.

Before this patch that import was done at py level at runtime from
outside to workaround the import cycle. This results in that strconv
functionality is not available while golang is only being imported.
So far it was not a problem, but when builtin string types will become
patched with bstr and ustr, that will become a problem because string
repr starts to be used at import time, which for pybstr is implemented
via strconv.quote .

-> Fix this by switching golang and strconv to cimport each other at pyx
level. There, similarly to C, the cycle works just ok out of the box.

This also automatically helps performance a bit:

    name                 old time/op  new time/op  delta
    quote[a]              805µs ± 0%   786µs ± 1%   -2.40%  (p=0.016 n=5+4)
    quote[\u03b1]        1.21ms ± 0%  1.12ms ± 0%   -7.47%  (p=0.008 n=5+5)
    quote[\u65e5]         785µs ± 0%   738µs ± 2%   -5.97%  (p=0.016 n=5+4)
    quote[\U0001f64f]    1.04ms ± 0%  0.92ms ± 1%  -11.73%  (p=0.008 n=5+5)
    stdquote             1.18µs ± 0%  1.19µs ± 0%   +0.54%  (p=0.008 n=5+5)
    unquote[a]           1.26ms ± 0%  1.08ms ± 0%  -14.66%  (p=0.008 n=5+5)
    unquote[\u03b1]       911µs ± 1%   797µs ± 0%  -12.55%  (p=0.008 n=5+5)
    unquote[\u65e5]       592µs ± 0%   522µs ± 0%  -11.81%  (p=0.008 n=5+5)
    unquote[\U0001f64f]  3.46ms ± 0%  3.21ms ± 0%   -7.34%  (p=0.008 n=5+5)
    stdunquote            812ns ± 1%   815ns ± 0%     ~     (p=0.183 n=5+5)
---
 golang/__init__.py     |  8 --------
 golang/_golang.pxd     |  9 ++++++++-
 golang/_golang_str.pyx | 22 +++++++++-------------
 golang/_strconv.pxd    |  5 +++++
 golang/_strconv.pyx    | 27 ++++++++++++++-------------
 golang/strconv.py      |  7 +++----
 6 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/golang/__init__.py b/golang/__init__.py
index 49163d0..e773775 100644
--- a/golang/__init__.py
+++ b/golang/__init__.py
@@ -324,11 +324,3 @@ def _emit_exc_context(exc, emitf, recursef):
     pyu         as u,       \
     pyustr      as ustr,    \
     pyuchr      as uchr
-
-# import golang.strconv into _golang from here to workaround cyclic golang ↔ strconv dependency
-def _():
-    from . import _golang
-    from . import strconv
-    _golang.pystrconv = strconv
-_()
-del _
diff --git a/golang/_golang.pxd b/golang/_golang.pxd
index 2e24025..664389f 100644
--- a/golang/_golang.pxd
+++ b/golang/_golang.pxd
@@ -43,7 +43,7 @@ In addition to Cython/nogil API, golang.pyx provides runtime for golang.py:
 - Python-level channels are represented by pychan + pyselect.
 - Python-level error is represented by pyerror.
 - Python-level panic is represented by pypanic.
-- Python-level strings are represented by pybstr and pyustr.
+- Python-level strings are represented by pybstr/pyustr and pyb/pyu.
 """
 
 
@@ -269,4 +269,11 @@ cdef class pyerror(Exception):
     cdef object from_error (error err) # -> pyerror | None
 
 
+# strings
+cpdef pyb(s) # -> bstr
+cpdef pyu(s) # -> ustr
 cdef __pystr(object obj)
+
+
+cdef (rune, int) _utf8_decode_rune(const byte[::1] s)
+cdef unicode _xunichr(rune i)
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index ba127b6..563e0f9 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -72,7 +72,7 @@ from cython cimport no_gc
 
 from libc.stdio cimport FILE
 
-pystrconv = None  # = golang.strconv imported at runtime (see __init__.py)
+from golang cimport strconv
 import string as pystring
 import types as pytypes
 import functools as pyfunctools
@@ -97,7 +97,7 @@ pybstr = _pybstr    # initially point to -> _pybstr/_pyustr
 pyustr = _pyustr    # TODO -> cdef for speed
 
 
-def pyb(s): # -> bstr
+cpdef pyb(s): # -> bstr
     """b converts object to bstr.
 
        - For bstr the same object is returned.
@@ -118,7 +118,7 @@ def pyb(s): # -> bstr
         raise TypeError("b: invalid type %s" % type(s))
     return bs
 
-def pyu(s): # -> ustr
+cpdef pyu(s): # -> ustr
     """u converts object to ustr.
 
        - For ustr the same object is returned.
@@ -1068,7 +1068,7 @@ cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
     if (quote in s) and (b'"' not in s):
         quote = b'"'
 
-    x, nonascii_escape = pystrconv._quote(s, quote)             # raw bytes
+    x, nonascii_escape = strconv._quote(s, quote)     # raw bytes
     if PY_MAJOR_VERSION < 3:
         return x, nonascii_escape
     else:
@@ -1093,7 +1093,7 @@ def pyqq(obj):
     # py3: str     | bytes
     if not isinstance(obj, (unicode, bytes)):
         obj = _bstringify(obj)
-    return pystrconv.quote(obj)
+    return strconv.pyquote(obj)
 
 
 
@@ -1875,16 +1875,12 @@ cdef extern from "Python.h":
 from six import unichr                      # py2: unichr       py3: chr
 from six import int2byte as bchr            # py2: chr          py3: lambda x: bytes((x,))
 
-_py_rune_error = utf8.RuneError
-
 cdef bint _ucs2_build = (sys.maxunicode ==     0xffff)      #    ucs2
 assert    _ucs2_build or sys.maxunicode >= 0x0010ffff       # or ucs4
 
 # _utf8_decode_rune decodes next UTF8-character from byte string s.
 #
 # _utf8_decode_rune(s) -> (r, size)
-def _py_utf8_decode_rune(const byte[::1] s):
-    return _utf8_decode_rune(s)
 cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
     if len(s) == 0:
         return utf8.RuneError, 0
@@ -2029,10 +2025,10 @@ else:
 #
 # it works correctly even on ucs2 python builds, where ordinals >= 0x10000 are
 # represented as 2 unicode points.
-if not _ucs2_build:
-    _xunichr = unichr
-else:
-    def _xunichr(i):
+cdef unicode _xunichr(rune i):
+    if not _ucs2_build:
+        return unichr(i)
+    else:
         if i < 0x10000:
             return unichr(i)
 
diff --git a/golang/_strconv.pxd b/golang/_strconv.pxd
index 5df4aef..69af360 100644
--- a/golang/_strconv.pxd
+++ b/golang/_strconv.pxd
@@ -19,3 +19,8 @@
 # See COPYING file for full licensing terms.
 # See https://www.nexedi.com/licensing for rationale and options.
 """Package strconv provides Go-compatible string conversions."""
+
+from golang cimport byte
+
+cpdef pyquote(s)
+cdef _quote(s, quote) # -> (quoted, nonascii_escape)
diff --git a/golang/_strconv.pyx b/golang/_strconv.pyx
index deb2d78..fac6735 100644
--- a/golang/_strconv.pyx
+++ b/golang/_strconv.pyx
@@ -25,16 +25,17 @@ from __future__ import print_function, absolute_import
 import unicodedata, codecs
 from six.moves import range as xrange
 
-from golang import b
-from golang._golang import _py_utf8_decode_rune as _utf8_decode_rune, _py_rune_error as _rune_error, _xunichr
+from golang cimport pyb
+from golang cimport _utf8_decode_rune, _xunichr
+from golang.unicode cimport utf8
 
 
 # quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
-def quote(s):  # -> bstr
-    q, _ = _quote(b(s), b'"')
-    return b(q)
+cpdef pyquote(s):  # -> bstr
+    q, _ = _quote(pyb(s), b'"')
+    return pyb(q)
 
-def _quote(s, quote): # -> (quoted, nonascii_escape)
+cdef _quote(s, quote): # -> (quoted, nonascii_escape)
     assert isinstance(s, bytes),     type(s)
     assert isinstance(quote, bytes), type(quote)
     assert len(quote) == 1,          repr(quote)
@@ -74,7 +75,7 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
             isize = i + size
 
             # decode error - just emit raw byte as escaped
-            if r == _rune_error  and  size == 1:
+            if r == utf8.RuneError  and  size == 1:
                 nonascii_escape = True
                 emit(br'\x%02x' % ord(c))
 
@@ -96,8 +97,8 @@ def _quote(s, quote): # -> (quoted, nonascii_escape)
 # unquote decodes "-quoted unicode|byte string.
 #
 # ValueError is raised if there are quoting syntax errors.
-def unquote(s):  # -> bstr
-    us, tail = unquote_next(s)
+def pyunquote(s):  # -> bstr
+    us, tail = pyunquote_next(s)
     if len(tail) != 0:
         raise ValueError('non-empty tail after closing "')
     return us
@@ -107,11 +108,11 @@ def unquote(s):  # -> bstr
 # it returns -> (unquoted(s), tail-after-")
 #
 # ValueError is raised if there are quoting syntax errors.
-def unquote_next(s):  # -> (bstr, bstr)
-    us, tail = _unquote_next(b(s))
-    return b(us), b(tail)
+def pyunquote_next(s):  # -> (bstr, bstr)
+    us, tail = _unquote_next(pyb(s))
+    return pyb(us), pyb(tail)
 
-def _unquote_next(s):
+cdef _unquote_next(s):
     assert isinstance(s, bytes)
 
     if len(s) == 0 or s[0:0+1] != b'"':
diff --git a/golang/strconv.py b/golang/strconv.py
index fec0ac9..6cbae96 100644
--- a/golang/strconv.py
+++ b/golang/strconv.py
@@ -22,7 +22,6 @@
 from __future__ import print_function, absolute_import
 
 from golang._strconv import \
-    quote,       \
-    _quote,      \
-    unquote,     \
-    unquote_next
+    pyquote             as quote,       \
+    pyunquote           as unquote,     \
+    pyunquote_next      as unquote_next

From ac751a5623b36481624c79a2947fa22089cbecbf Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 26 Jun 2023 22:43:05 +0300
Subject: [PATCH 18/29] strconv: Optimize quoting lightly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add type annotations and use C-level objects instead of py-ones where it
is easy to do. We are not all-good yet, but this already brings some noticable speedup:

    name                 old time/op  new time/op  delta
    quote[a]              786µs ± 1%    10µs ± 0%  -98.76%  (p=0.016 n=4+5)
    quote[\u03b1]        1.12ms ± 0%  0.41ms ± 0%  -63.37%  (p=0.008 n=5+5)
    quote[\u65e5]         738µs ± 2%   258µs ± 0%  -65.07%  (p=0.016 n=4+5)
    quote[\U0001f64f]     920µs ± 1%    78µs ± 0%  -91.46%  (p=0.016 n=5+4)
    stdquote             1.19µs ± 0%  1.19µs ± 0%     ~     (p=0.794 n=5+5)
    unquote[a]           1.08ms ± 0%  1.08ms ± 1%     ~     (p=0.548 n=5+5)
    unquote[\u03b1]       797µs ± 0%   807µs ± 1%   +1.23%  (p=0.008 n=5+5)
    unquote[\u65e5]       522µs ± 0%   520µs ± 1%     ~     (p=0.056 n=5+5)
    unquote[\U0001f64f]  3.21ms ± 0%  3.14ms ± 0%   -2.13%  (p=0.008 n=5+5)
    stdunquote            815ns ± 0%   836ns ± 0%   +2.63%  (p=0.008 n=5+5)
---
 golang/_golang_str.pyx |  16 +++---
 golang/_strconv.pxd    |   2 +-
 golang/_strconv.pyx    | 115 ++++++++++++++++++++++++++++++-----------
 3 files changed, 91 insertions(+), 42 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 563e0f9..e6fc8f8 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -1056,19 +1056,15 @@ _bstrustr_remove_unsupported_slots()
 #
 # NOTE the return type is str type of current python, so that quoted result
 # could be directly used in __repr__ or __str__ implementation.
-cdef _bpysmartquote_u3b2(s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
-    # TODO change to `const byte[::1] s` after strconv._quote is moved to pyx
-    if isinstance(s, bytearray):
-        s = _bytearray_data(s)
-    assert isinstance(s, bytes), s
-
+cdef _bpysmartquote_u3b2(const byte[::1] s): # -> (unicode(py3)|bytes(py2), nonascii_escape)
     # smartquotes: choose ' or " as quoting character exactly the same way python does
     # https://github.com/python/cpython/blob/v2.7.18-0-g8d21aa21f2c/Objects/stringobject.c#L905-L909
-    quote = b"'"
-    if (quote in s) and (b'"' not in s):
-        quote = b'"'
+    cdef byte quote = ord("'")
+    if (quote in s) and (ord('"') not in s):
+        quote = ord('"')
 
-    x, nonascii_escape = strconv._quote(s, quote)     # raw bytes
+    cdef bint nonascii_escape
+    x = strconv._quote(s, quote, &nonascii_escape)              # raw bytes
     if PY_MAJOR_VERSION < 3:
         return x, nonascii_escape
     else:
diff --git a/golang/_strconv.pxd b/golang/_strconv.pxd
index 69af360..0107aad 100644
--- a/golang/_strconv.pxd
+++ b/golang/_strconv.pxd
@@ -23,4 +23,4 @@
 from golang cimport byte
 
 cpdef pyquote(s)
-cdef _quote(s, quote) # -> (quoted, nonascii_escape)
+cdef bytes _quote(const byte[::1] s, char quote, bint* out_nonascii_escape) # -> (quoted, nonascii_escape)
diff --git a/golang/_strconv.pyx b/golang/_strconv.pyx
index fac6735..8ffd6f5 100644
--- a/golang/_strconv.pyx
+++ b/golang/_strconv.pyx
@@ -23,49 +23,82 @@
 from __future__ import print_function, absolute_import
 
 import unicodedata, codecs
-from six.moves import range as xrange
 
-from golang cimport pyb
+from golang cimport pyb, byte, rune
 from golang cimport _utf8_decode_rune, _xunichr
 from golang.unicode cimport utf8
 
+from cpython cimport PyObject
+
+cdef extern from "Python.h":
+    PyObject* PyBytes_FromStringAndSize(char*, Py_ssize_t) except NULL
+    char* PyBytes_AS_STRING(PyObject*)
+    int _PyBytes_Resize(PyObject**, Py_ssize_t) except -1
+    void Py_DECREF(PyObject*)
+
 
 # quote quotes unicode|bytes string into valid "..." bytestring always quoted with ".
 cpdef pyquote(s):  # -> bstr
-    q, _ = _quote(pyb(s), b'"')
+    cdef bint _
+    q = _quote(pyb(s), '"', &_)
     return pyb(q)
 
-cdef _quote(s, quote): # -> (quoted, nonascii_escape)
-    assert isinstance(s, bytes),     type(s)
-    assert isinstance(quote, bytes), type(quote)
-    assert len(quote) == 1,          repr(quote)
 
-    outv = []
-    emit = outv.append
-    nonascii_escape = False
-    i = 0
+cdef char[16] hexdigit # = '0123456789abcdef'
+for i, c in enumerate('0123456789abcdef'):
+    hexdigit[i] = ord(c)
+
+
+# XXX not possible to use `except (NULL, False)`
+#     (https://stackoverflow.com/a/66335433/9456786)
+cdef bytes _quote(const byte[::1] s, char quote, bint* out_nonascii_escape): # -> (quoted, nonascii_escape)
+    # 2*" + max(4)*each byte (+ 1 for tail \0 implicitly by PyBytesObject)
+    cdef Py_ssize_t qmaxsize = 1 + 4*len(s) + 1
+    cdef PyObject*  qout     = PyBytes_FromStringAndSize(NULL, qmaxsize)
+    cdef byte*      q        = <byte*>PyBytes_AS_STRING(qout)
+
+    cdef bint nonascii_escape = False
+    cdef Py_ssize_t i = 0, j
+    cdef Py_ssize_t isize
+    cdef int size
+    cdef rune r
+    cdef byte c
+    q[0] = quote;  q += 1
     while i < len(s):
-        c = s[i:i+1]
+        c = s[i]
         # fast path - ASCII only
-        if ord(c) < 0x80:
-            if c in (b'\\', quote):
-                emit(b'\\'+c)
+        if c < 0x80:
+            if c in (ord('\\'), quote):
+                q[0] = ord('\\')
+                q[1] = c
+                q += 2
 
             # printable ASCII
-            elif b' ' <= c <= b'\x7e':
-                emit(c)
+            elif 0x20 <= c <= 0x7e:
+                q[0] = c
+                q += 1
 
             # non-printable ASCII
-            elif c == b'\t':
-                emit(br'\t')
-            elif c == b'\n':
-                emit(br'\n')
-            elif c == b'\r':
-                emit(br'\r')
+            elif c == ord('\t'):
+                q[0] = ord('\\')
+                q[1] = ord('t')
+                q += 2
+            elif c == ord('\n'):
+                q[0] = ord('\\')
+                q[1] = ord('n')
+                q += 2
+            elif c == ord('\r'):
+                q[0] = ord('\\')
+                q[1] = ord('r')
+                q += 2
 
             # everything else is non-printable
             else:
-                emit(br'\x%02x' % ord(c))
+                q[0] = ord('\\')
+                q[1] = ord('x')
+                q[2] = hexdigit[c >> 4]
+                q[3] = hexdigit[c & 0xf]
+                q += 4
 
             i += 1
 
@@ -77,21 +110,41 @@ cdef _quote(s, quote): # -> (quoted, nonascii_escape)
             # decode error - just emit raw byte as escaped
             if r == utf8.RuneError  and  size == 1:
                 nonascii_escape = True
-                emit(br'\x%02x' % ord(c))
+                q[0] = ord('\\')
+                q[1] = ord('x')
+                q[2] = hexdigit[c >> 4]
+                q[3] = hexdigit[c & 0xf]
+                q += 4
 
             # printable utf-8 characters go as is
-            elif unicodedata.category(_xunichr(r))[0] in _printable_cat0:
-                emit(s[i:isize])
+            elif _unicodedata_category(_xunichr(r))[0] in 'LNPS': # letters, numbers, punctuation, symbols
+                for j in range(i, isize):
+                    q[0] = s[j]
+                    q += 1
 
             # everything else goes in numeric byte escapes
             else:
                 nonascii_escape = True
-                for j in xrange(i, isize):
-                    emit(br'\x%02x' % ord(s[j:j+1]))
+                for j in range(i, isize):
+                    c = s[j]
+                    q[0] = ord('\\')
+                    q[1] = ord('x')
+                    q[2] = hexdigit[c >> 4]
+                    q[3] = hexdigit[c & 0xf]
+                    q += 4
 
             i = isize
 
-    return (quote + b''.join(outv) + quote, nonascii_escape)
+    q[0] = quote;  q += 1
+    q[0] = 0;      # don't q++ at last because size does not include tail \0
+    cdef Py_ssize_t qsize = (q - <byte*>PyBytes_AS_STRING(qout))
+    assert qsize <= qmaxsize
+    _PyBytes_Resize(&qout, qsize)
+
+    bqout = <bytes>qout
+    Py_DECREF(qout)
+    out_nonascii_escape[0] = nonascii_escape
+    return bqout
 
 
 # unquote decodes "-quoted unicode|byte string.
@@ -181,4 +234,4 @@ cdef _unquote_next(s):
     return b''.join(outv), s
 
 
-_printable_cat0 = frozenset(['L', 'N', 'P', 'S'])   # letters, numbers, punctuation, symbols
+cdef _unicodedata_category = unicodedata.category

From 17dbfbac88a1adba8da0c733e24b2a8317139468 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Thu, 5 Oct 2023 11:22:19 +0300
Subject: [PATCH 19/29] X My draft state of x/gpystr work;  py2/py3 pickle
 problem should be essentially solved

---
 .gitmodules                         |    6 +
 3rdparty/capstone                   |    1 +
 3rdparty/funchook                   |    1 +
 MANIFEST.in                         |   10 +-
 README.rst                          |   17 +-
 conftest.py                         |   10 +
 golang/_golang.pyx                  |    4 +-
 golang/_golang_str.pyx              |  550 +++++++++--
 golang/_golang_str_pickle.S         |  371 ++++++++
 golang/_golang_str_pickle.pyx       | 1325 +++++++++++++++++++++++++++
 golang/_golang_str_pickle_test.pyx  |  181 ++++
 golang/_strconv.pyx                 |   11 +-
 golang/fmt.h                        |    2 +-
 golang/golang_str_pickle_test.py    |  512 +++++++++++
 golang/golang_str_test.py           |  379 +++++---
 golang/libgolang.h                  |   11 +-
 golang/os.cpp                       |    4 +-
 golang/os.h                         |    2 +-
 golang/os/signal.cpp                |    6 +-
 golang/pyx/build.py                 |   55 +-
 golang/runtime.cpp                  |   69 ++
 golang/runtime.h                    |   50 +
 golang/runtime/internal/atomic.cpp  |    4 +-
 golang/runtime/internal/syscall.cpp |   14 +-
 golang/runtime/internal/syscall.h   |    4 +-
 golang/runtime/libgolang.cpp        |    2 +-
 golang/runtime/platform.h           |   65 ++
 gpython/.gitignore                  |    1 +
 gpython/__init__.py                 |   56 +-
 gpython/_gpython.pyx                |   31 +
 gpython/_gpython_c.cpp              |   76 ++
 gpython/gpython_test.py             |   76 +-
 pyproject.toml                      |    2 +-
 setup.py                            |  254 ++++-
 34 files changed, 3858 insertions(+), 304 deletions(-)
 create mode 100644 .gitmodules
 create mode 160000 3rdparty/capstone
 create mode 160000 3rdparty/funchook
 create mode 100644 conftest.py
 create mode 100644 golang/_golang_str_pickle.S
 create mode 100644 golang/_golang_str_pickle.pyx
 create mode 100644 golang/_golang_str_pickle_test.pyx
 create mode 100644 golang/golang_str_pickle_test.py
 create mode 100644 golang/runtime.cpp
 create mode 100644 golang/runtime.h
 create mode 100644 golang/runtime/platform.h
 create mode 100644 gpython/.gitignore
 create mode 100644 gpython/_gpython.pyx
 create mode 100644 gpython/_gpython_c.cpp

diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 0000000..c279e31
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,6 @@
+[submodule "3rdparty/funchook"]
+	path = 3rdparty/funchook
+	url = https://github.com/kubo/funchook.git
+[submodule "3rdparty/capstone"]
+	path = 3rdparty/capstone
+	url = https://github.com/capstone-engine/capstone.git
diff --git a/3rdparty/capstone b/3rdparty/capstone
new file mode 160000
index 0000000..097c04d
--- /dev/null
+++ b/3rdparty/capstone
@@ -0,0 +1 @@
+Subproject commit 097c04d9413c59a58b00d4d1c8d5dc0ac158ffaa
diff --git a/3rdparty/funchook b/3rdparty/funchook
new file mode 160000
index 0000000..88388db
--- /dev/null
+++ b/3rdparty/funchook
@@ -0,0 +1 @@
+Subproject commit 88388db3c69e16c1560fee65c6857d75f5ce6fd5
diff --git a/MANIFEST.in b/MANIFEST.in
index d5bebb2..e2cae70 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -2,6 +2,9 @@ include COPYING README.rst CHANGELOG.rst tox.ini pyproject.toml trun .nxdtest
 include golang/libgolang.h
 include golang/runtime/libgolang.cpp
 include golang/runtime/libpyxruntime.cpp
+include golang/runtime/platform.h
+include golang/runtime.h
+include golang/runtime.cpp
 include golang/pyx/runtime.h
 include golang/pyx/testprog/golang_dso_user/dsouser/dso.h
 include golang/pyx/testprog/golang_dso_user/dsouser/dso.cpp
@@ -36,7 +39,10 @@ include golang/time.cpp
 include golang/_testing.h
 include golang/_compat/windows/strings.h
 include golang/_compat/windows/unistd.h
+include gpython/_gpython_c.cpp
 recursive-include golang    *.py *.pxd *.pyx *.toml *.txt*
-recursive-include gpython   *.py
-recursive-include 3rdparty  *.h
+recursive-include gpython   *.py       *.pyx
+recursive-include 3rdparty  *.h *.c *.cpp *.S *.py *.cmake *.cs *.java
+recursive-include 3rdparty  LICENSE README.md README COPYING Makefile CMakeLists.txt
 recursive-exclude golang    *_dsoinfo.py
+include conftest.py
diff --git a/README.rst b/README.rst
index 06ec430..80846e7 100644
--- a/README.rst
+++ b/README.rst
@@ -4,7 +4,7 @@
 
 Package `golang` provides Go-like features for Python:
 
-- `gpython` is Python interpreter with support for lightweight threads.
+- `gpython` is Python interpreter with support for lightweight threads and uniform UTF8-based approach to strings.
 - `go` spawns lightweight thread.
 - `chan` and `select` provide channels with Go semantic.
 - `func` allows to define methods separate from class.
@@ -46,15 +46,16 @@ __ http://libuv.org/
 __ http://software.schmorp.de/pkg/libev.html
 
 
-Additionally GPython sets UTF-8 to be default encoding always, and puts `go`,
-`chan`, `select` etc into builtin namespace.
+Additionally GPython sets UTF-8 to be default encoding always, puts `go`,
+`chan`, `select` etc into builtin namespace, and makes `bstr`/`ustr` to be used
+instead of builtin string types.
 
 .. note::
 
    GPython is optional and the rest of Pygolang can be used from under standard Python too.
    However without gevent integration `go` spawns full - not lightweight - OS thread.
-   GPython can be also used with threads - not gevent - runtime. Please see
-   `GPython options`_ for details.
+   GPython can be also used with threads - not gevent - runtime and with builtin string types.
+   Please see `GPython options`_ for details.
 
 
 Goroutines and channels
@@ -571,3 +572,9 @@ GPython-specific options and environment variables are listed below:
     coroutines, while with `threads` `go` spawns full OS thread. `gevent` is
     default. The runtime to use can be also specified via `$GPYTHON_RUNTIME`
     environment variable.
+
+`-X gpython.strings=(bstr+ustr|pystd)`
+    Specify which string types GPython should use. `bstr+ustr` provide
+    uniform UTF8-based approach to strings, while `pystd` selects regular
+    `str` and `unicode`. `bstr+ustr` is default. String types to use can be
+    also specified via `$GPYTHON_STRINGS` environment variable.
diff --git a/conftest.py b/conftest.py
new file mode 100644
index 0000000..1ca5c1b
--- /dev/null
+++ b/conftest.py
@@ -0,0 +1,10 @@
+# ignore tests in distorm - else it breaks as e.g.
+#
+# 3rdparty/funchook/distorm/python/test_distorm3.py:15: in <module>
+#     import distorm3
+# 3rdparty/funchook/distorm/python/distorm3/__init__.py:57: in <module>
+#     _distorm = _load_distorm()
+# 3rdparty/funchook/distorm/python/distorm3/__init__.py:55: in _load_distorm
+#     raise ImportError("Error loading the diStorm dynamic library (or cannot load library into process).")
+# E   ImportError: Error loading the diStorm dynamic library (or cannot load library into process).
+collect_ignore = ["3rdparty"]
diff --git a/golang/_golang.pyx b/golang/_golang.pyx
index 24f7f23..b857197 100644
--- a/golang/_golang.pyx
+++ b/golang/_golang.pyx
@@ -3,7 +3,7 @@
 # cython: binding=False
 # cython: c_string_type=str, c_string_encoding=utf8
 # distutils: language = c++
-# distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx
+# distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx _golang_str_pickle.pyx
 #
 # Copyright (C) 2018-2023  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
@@ -34,7 +34,7 @@ from __future__ import print_function, absolute_import
 _init_libgolang()
 _init_libpyxruntime()
 
-from cpython cimport PyObject, Py_INCREF, Py_DECREF, PY_MAJOR_VERSION
+from cpython cimport PyObject, Py_INCREF, Py_DECREF, Py_CLEAR, PY_MAJOR_VERSION
 ctypedef PyObject *pPyObject # https://github.com/cython/cython/issues/534
 cdef extern from "Python.h":
     ctypedef struct PyTupleObject:
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index e6fc8f8..d7282a0 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -22,6 +22,8 @@
 It is included from _golang.pyx .
 """
 
+from libc.stdio cimport fprintf, stderr # XXX kill
+
 from golang.unicode cimport utf8
 
 from cpython cimport PyUnicode_AsUnicode, PyUnicode_GetSize, PyUnicode_FromUnicode
@@ -31,11 +33,13 @@ from cpython cimport Py_EQ, Py_NE, Py_LT, Py_GT, Py_LE, Py_GE
 from cpython.iterobject cimport PySeqIter_New
 from cpython cimport PyThreadState_GetDict, PyDict_SetItem
 from cpython cimport PyObject_CheckBuffer
+from cpython cimport Py_TPFLAGS_HAVE_GC, Py_TPFLAGS_HEAPTYPE, Py_TPFLAGS_READY, PyType_Ready
+from cpython cimport PyBytes_Format, PyUnicode_Format, PyObject_Str
 
 cdef extern from "Python.h":
     PyTypeObject PyBytes_Type
     ctypedef struct PyBytesObject:
-        pass
+        char *ob_sval
 
 cdef extern from "Python.h":
     PyTypeObject PyUnicode_Type
@@ -60,13 +64,40 @@ cdef extern from "Python.h":
     ctypedef struct _XPyTypeObject "PyTypeObject":
         PyObject* tp_new(PyTypeObject*, PyObject*, PyObject*) except NULL
         initproc  tp_init
+
+        Py_ssize_t tp_vectorcall_offset
+        Py_ssize_t tp_weaklistoffset
+
+        PyObject *tp_bases
+        PyObject *tp_mro
+        PyObject *tp_cache
+        PyObject *tp_weaklist
+        PyObject *tp_subclasses
+
         PySequenceMethods *tp_as_sequence
+        PyMethodDef       *tp_methods
+        PyMemberDef       *tp_members
 
     ctypedef struct PySequenceMethods:
         binaryfunc sq_concat
         binaryfunc sq_inplace_concat
         object (*sq_slice) (object, Py_ssize_t, Py_ssize_t)     # present only on py2
 
+cdef extern from "Python.h":
+    ctypedef struct PyVarObject:
+        Py_ssize_t ob_size
+
+
+cdef extern from "funchook.h" nogil:
+    ctypedef struct funchook_t
+    funchook_t* funchook_create()
+    int funchook_prepare(funchook_t* h, void** target_func, void* hook_func)
+    int funchook_install(funchook_t* h, int flags)
+    int funchook_uninstall(funchook_t* h, int flags)
+    int funchook_destroy(funchook_t*)
+    const char* funchook_error_message(const funchook_t*)
+    int funchook_set_debug_file(const char* name)
+
 
 from cython cimport no_gc
 
@@ -77,10 +108,6 @@ import string as pystring
 import types as pytypes
 import functools as pyfunctools
 import re as pyre
-if PY_MAJOR_VERSION >= 3:
-    import copyreg as pycopyreg
-else:
-    import copy_reg as pycopyreg
 
 
 # zbytes/zunicode point to original std bytes/unicode types even if they will be patched.
@@ -250,6 +277,8 @@ cdef __pystr(object obj): # -> ~str
         return pyb(obj)
 
 
+# XXX -> bchr ?  (not good as "character" means "unicode character")
+#     -> bstr.chr ?
 def pybbyte(int i): # -> 1-byte bstr
     """bbyte(i) returns 1-byte bstr with ordinal i."""
     return pyb(bytearray([i]))
@@ -259,6 +288,22 @@ def pyuchr(int i):  # -> 1-character ustr
     return pyu(unichr(i))
 
 
+# XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)                   XXX review text
+# _pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↓ ._pybstr__new__() .
+# we keep it out of class instead of cdef @staticmethod due to https://github.com/cython/cython/issues/5337
+# XXX def instead of cdef due to ""Non-trivial keyword arguments and starred arguments not allowed in cdef functions
+def _pybstr__new__(cls, object='', encoding=None, errors=None):
+    # encoding or errors  ->  object must expose buffer interface
+    if not (encoding is None and errors is None):
+        object = _buffer_decode(object, encoding, errors)
+
+    # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
+    object = _bstringify(object)
+    assert isinstance(object, (unicode, bytes)), object
+    bobj = _pyb(cls, object)
+    assert bobj is not None
+    return bobj
+
 @no_gc                       # note setup.py assist this to compile despite
 cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
     """bstr is byte-string.
@@ -293,34 +338,26 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
-    # _pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↓ .____new__() .
-    @staticmethod
-    def ____new__(cls, object='', encoding=None, errors=None):
-        # encoding or errors  ->  object must expose buffer interface
-        if not (encoding is None and errors is None):
-            object = _buffer_decode(object, encoding, errors)
+    # _pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↑ _pybstr__new__() .
 
-        # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
-        object = _bstringify(object)
-        assert isinstance(object, (unicode, bytes)), object
-        bobj = _pyb(cls, object)
-        assert bobj is not None
-        return bobj
 
-
-    def __bytes__(self):    return self
+    def __bytes__(self):    return pyb(self)  # see __str__
     def __unicode__(self):  return pyu(self)
 
     def __str__(self):
         if PY_MAJOR_VERSION >= 3:
             return pyu(self)
         else:
-            return self
+            return pyb(self)  # self  or  pybstr if it was subclass
 
     def __repr__(self):
         qself, nonascii_escape = _bpysmartquote_u3b2(self)
         bs = _inbstringify_get()
         if bs.inbstringify == 0  or  bs.inrepr:
+            if pybstr is bytes:     # don't wrap with b(...) when bstr replaces builtin str
+                if PY_MAJOR_VERSION >= 3:
+                    qself = 'b' + qself
+                return qself
             if nonascii_escape:         # so that e.g. b(u'\x80') is represented as
                 qself = 'b' + qself     # b(b'\xc2\x80'),  not as b('\xc2\x80')
             return "b(" + qself + ")"
@@ -328,18 +365,8 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             # [b('β')] goes as ['β'] when under _bstringify for %s
             return qself
 
-
-    # override reduce for protocols < 2. Builtin handler for that goes through
-    # copyreg._reduce_ex which eventually calls bytes(bstr-instance) to
-    # retrieve state, which gives bstr, not bytes. Fix state to be bytes ourselves.
     def __reduce_ex__(self, protocol):
-        if protocol >= 2:
-            return zbytes.__reduce_ex__(self, protocol)
-        return (
-            pycopyreg._reconstructor,
-            (self.__class__, self.__class__, _bdata(self))
-        )
-
+        return _bstr__reduce_ex__(self, protocol)
 
     def __hash__(self):
         # hash of the same unicode and UTF-8 encoded bytes is generally different
@@ -381,6 +408,7 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             else:
                 return pyb(x)
 
+    # XXX temp disabled
     # __iter__  - yields unicode characters
     def __iter__(self):
         # TODO iterate without converting self to u
@@ -575,7 +603,7 @@ cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
     if _kw != NULL:
         kw = <object>_kw
 
-    cdef object x = _pybstr.____new__(<object>_cls, *argv, **kw)
+    cdef object x = _pybstr__new__(<object>_cls, *argv, **kw)
     Py_INCREF(x)
     return <PyObject*>x
 (<_XPyTypeObject*>_pybstr).tp_new   = &_pybstr_tp_new
@@ -592,6 +620,18 @@ cdef PyObject* _pybstr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
 # and bytes are completely the same.
 assert sizeof(_pybstr) == sizeof(PyBytesObject)
 
+# XXX text
+def _pyustr__new__(cls, object='', encoding=None, errors=None):
+    # encoding or errors  ->  object must expose buffer interface
+    if not (encoding is None and errors is None):
+        object = _buffer_decode(object, encoding, errors)
+
+    # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
+    object = _bstringify(object)
+    assert isinstance(object, (unicode, bytes)), object
+    uobj = _pyu(cls, object)
+    assert uobj is not None
+    return uobj
 
 @no_gc
 cdef class _pyustr(unicode):
@@ -622,27 +662,15 @@ cdef class _pyustr(unicode):
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
-    # _pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↓ .____new__() .
-    @staticmethod
-    def ____new__(cls, object='', encoding=None, errors=None):
-        # encoding or errors  ->  object must expose buffer interface
-        if not (encoding is None and errors is None):
-            object = _buffer_decode(object, encoding, errors)
-
-        # _bstringify. Note: it handles bstr/ustr / unicode/bytes/bytearray as documented
-        object = _bstringify(object)
-        assert isinstance(object, (unicode, bytes)), object
-        uobj = _pyu(cls, object)
-        assert uobj is not None
-        return uobj
+    # _pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↑ _pyustr__new__() .
 
 
     def __bytes__(self):    return pyb(self)
-    def __unicode__(self):  return self
+    def __unicode__(self):  return pyu(self)  # see __str__
 
     def __str__(self):
         if PY_MAJOR_VERSION >= 3:
-            return self
+            return pyu(self)  # = self  or  pyustr if it was subclass
         else:
             return pyb(self)
 
@@ -650,6 +678,11 @@ cdef class _pyustr(unicode):
         qself, nonascii_escape = _upysmartquote_u3b2(self)
         bs = _inbstringify_get()
         if bs.inbstringify == 0  or  bs.inrepr:
+            if pyustr is unicode:   # don't wrap with u(...) when ustr replaces builtin str/unicode
+                if not nonascii_escape: # but only if the string is valid utf-8
+                    if PY_MAJOR_VERSION < 3:
+                        qself = 'u'+qself
+                    return qself
             if nonascii_escape:
                 qself = 'b'+qself       # see bstr.__repr__
             return "u(" + qself + ")"
@@ -657,18 +690,8 @@ cdef class _pyustr(unicode):
             # [u('β')] goes as ['β'] when under _bstringify for %s
             return qself
 
-
-    # override reduce for protocols < 2. Builtin handler for that goes through
-    # copyreg._reduce_ex which eventually calls unicode(ustr-instance) to
-    # retrieve state, which gives ustr, not unicode. Fix state to be unicode ourselves.
     def __reduce_ex__(self, protocol):
-        if protocol >= 2:
-            return zunicode.__reduce_ex__(self, protocol)
-        return (
-            pycopyreg._reconstructor,
-            (self.__class__, self.__class__, _udata(self))
-        )
-
+        return _ustr__reduce_ex__(self, protocol)
 
     def __hash__(self):
         # see _pybstr.__hash__ for why we stick to hash of current str
@@ -718,7 +741,7 @@ cdef class _pyustr(unicode):
         # https://cython.readthedocs.io/en/latest/src/userguide/migrating_to_cy30.html#arithmetic-special-methods
         # see also https://github.com/cython/cython/issues/4750
         if type(a) is not pyustr:
-            assert type(b) is pyustr
+            assert type(b) is pyustr,  type(b)
             return b.__radd__(a)
         return pyu(zunicode.__add__(a, _pyu_coerce(b)))
 
@@ -738,7 +761,7 @@ cdef class _pyustr(unicode):
     # __mul__, __rmul__     (no need to override __imul__)
     def __mul__(a, b):
         if type(a) is not pyustr:
-            assert type(b) is pyustr
+            assert type(b) is pyustr,  type(b)
             return b.__rmul__(a)
         return pyu(zunicode.__mul__(a, b))
     def __rmul__(b, a):
@@ -939,7 +962,7 @@ cdef PyObject* _pyustr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
     if _kw != NULL:
         kw = <object>_kw
 
-    cdef object x = _pyustr.____new__(<object>_cls, *argv, **kw)
+    cdef object x = _pyustr__new__(<object>_cls, *argv, **kw)
     Py_INCREF(x)
     return <PyObject*>x
 (<_XPyTypeObject*>_pyustr).tp_new   = &_pyustr_tp_new
@@ -963,9 +986,10 @@ cdef class _pyustrIter:
 # _bdata/_udata retrieve raw data from bytes/unicode.
 def _bdata(obj): # -> bytes
     assert isinstance(obj, bytes)
-    _ = obj.__getnewargs__()[0] # (`bytes-data`,)
-    assert type(_) is bytes
-    return _
+    if type(obj) is not bytes:
+        obj = obj.__getnewargs__()[0] # (`bytes-data`,)
+    assert type(obj) is bytes
+    return obj
     """
     bcopy = bytes(memoryview(obj))
     assert type(bcopy) is bytes
@@ -973,9 +997,10 @@ def _bdata(obj): # -> bytes
     """
 def _udata(obj): # -> unicode
     assert isinstance(obj, unicode)
-    _ = obj.__getnewargs__()[0] # (`unicode-data`,)
-    assert type(_) is unicode
-    return _
+    if type(obj) is not unicode:
+        obj = obj.__getnewargs__()[0] # (`unicode-data`,)
+    assert type(obj) is unicode
+    return obj
     """
     cdef Py_UNICODE* u     = PyUnicode_AsUnicode(obj)
     cdef Py_ssize_t  size  = PyUnicode_GetSize(obj)
@@ -1027,6 +1052,22 @@ if PY2:
 
 # ---- adjust bstr/ustr classes after what cython generated ----
 
+# for pybstr/pyustr cython generates .tp_dealloc that refer to bytes/unicode types directly.
+# override that to refer to zbytes/zunicode to avoid infinite recursion on free.
+cdef void _pybstr_tp_dealloc(PyObject *self):   (<PyTypeObject*>zbytes)   .tp_dealloc(self)
+cdef void _pyustr_tp_dealloc(PyObject *self):   (<PyTypeObject*>zunicode) .tp_dealloc(self)
+(<PyTypeObject*>pybstr).tp_dealloc = &_pybstr_tp_dealloc
+(<PyTypeObject*>pyustr).tp_dealloc = &_pyustr_tp_dealloc
+
+# change names of bstr/ustr to be e.g. "golang.bstr" instead of "golang._golang._bstr"  XXX adjust after .name=str
+# this makes sure that unpickling saved bstr does not load via unpatched origin
+# class, and is also generally good for saving pickle size and for reducing _golang exposure.
+# XXX -> _golang_str_pickle.pyx ?
+(<PyTypeObject*>pybstr).tp_name = "golang.bstr"
+(<PyTypeObject*>pyustr).tp_name = "golang.ustr"
+assert pybstr.__module__ == "golang";  assert pybstr.__name__ == "bstr"
+assert pyustr.__module__ == "golang";  assert pyustr.__name__ == "ustr"
+
 # remove unsupported bstr/ustr methods. do it outside of `cdef class` to
 # workaround https://github.com/cython/cython/issues/4556 (`if ...` during
 # `cdef class` is silently handled wrongly)
@@ -1039,12 +1080,11 @@ cdef _bstrustr_remove_unsupported_slots():
         'removesuffix', # py3.9     TODO provide fallback implementation
     )
     for slot in vslot:
-        if not hasattr(unicode, slot):
-            _patch_slot(<PyTypeObject*>pybstr, slot, DEL)
-            try:
+        if not hasattr(zunicode, slot):
+            if hasattr(pybstr, slot):   # we might have already removed it on previous call
+                _patch_slot(<PyTypeObject*>pybstr, slot, DEL)
+            if hasattr(pyustr, slot):   # e.g. we do not define ustr.isprintable ourselves
                 _patch_slot(<PyTypeObject*>pyustr, slot, DEL)
-            except KeyError:    # e.g. we do not define ustr.isprintable ourselves
-                pass
 _bstrustr_remove_unsupported_slots()
 
 
@@ -1105,7 +1145,7 @@ cdef _bstringify(object obj): # -> unicode|bytes
     _bstringify_enter()
 
     try:
-        if PY_MAJOR_VERSION >= 3:
+        if False:   # PY_MAJOR_VERSION >= 3:
             # NOTE this depends on patches to bytes.{__repr__,__str__} below
             return unicode(obj)
 
@@ -1118,10 +1158,12 @@ cdef _bstringify(object obj): # -> unicode|bytes
             #
             # NOTE this depends on patches to bytes.{__repr__,__str__} and
             #      unicode.{__repr__,__str__} below.
-            if hasattr(obj, '__unicode__'):
-                return obj.__unicode__()
-            elif hasattr(obj, '__str__'):
-                return obj.__str__()
+            if False:   # PY_MAJOR_VERSION < 3  and  hasattr(obj, '__unicode__'):
+                return obj.__unicode__()    # XXX needed ?
+            elif Py_TYPE(obj).tp_str != NULL:
+                return Py_TYPE(obj).tp_str(obj)
+            #elif hasattr(obj, '__str__'):
+            #    return obj.__str__()
             else:
                 return repr(obj)
 
@@ -1422,19 +1464,24 @@ cdef _InBStringify _inbstringify_get():
     return ts_inbstringify
 
 
+# XXX text
+cdef _get_slot(PyTypeObject* typ, str name):
+    typdict = <dict>(typ.tp_dict)
+    return typdict[name]
+
 # _patch_slot installs func_or_descr into typ's __dict__ as name.
 #
-# if func_or_descr is descriptor (has __get__), it is installed as is.
+# if func_or_descr is descriptor (has __get__), or asis=True, it is installed as is.
 # otherwise it is wrapped with "unbound method" descriptor.
 #
 # if func_or_descr is DEL the slot is removed from typ's __dict__.
 cdef DEL = object()
-cdef _patch_slot(PyTypeObject* typ, str name, object func_or_descr):
+cdef _patch_slot(PyTypeObject* typ, str name, object func_or_descr, asis=False):
     typdict = <dict>(typ.tp_dict)
     #print("\npatching %s.%s  with  %r" % (typ.tp_name, name, func_or_descr))
     #print("old:  %r" % typdict.get(name))
 
-    if hasattr(func_or_descr, '__get__')  or  func_or_descr is DEL:
+    if hasattr(func_or_descr, '__get__')  or  func_or_descr is DEL  or  asis:
         descr = func_or_descr
     else:
         func = func_or_descr
@@ -1498,7 +1545,7 @@ cdef object _atidx_re = pyre.compile('.* at index ([0-9]+)$')
 cdef _bprintf(const byte[::1] fmt, xarg): # -> pybstr
     cdef bytearray out = bytearray()
 
-    cdef tuple  argv = None  # if xarg is tuple
+    cdef object argv = None  # if xarg is tuple or subclass
     cdef object argm = None  # if xarg is mapping
 
     # https://github.com/python/cpython/blob/2.7-0-g8d21aa21f2c/Objects/stringobject.c#L4298-L4300
@@ -1704,7 +1751,11 @@ cdef _bprintf(const byte[::1] fmt, xarg): # -> pybstr
 
         #print('--> __mod__ ', repr(fmt1), ' % ', repr(arg))
         try:
-            s = zbytes.__mod__(fmt1, arg)
+            IF PY2:
+                # NOTE not zbytes.__mod__ because underlying PyBytes_Format is patched
+                s = _pbytes_Format(fmt1, arg)
+            ELSE:
+                s = zbytes.__mod__(fmt1, arg)
         except ValueError as e:
             # adjust position in '... at index <idx>' from fmt1 to fmt
             if len(e.args) == 1:
@@ -1795,6 +1846,50 @@ class _BFormatter(pystring.Formatter):
             return super(_BFormatter, self).get_field(field_name, args, kwargs)
 
 
+# XXX place, comments
+# str % ... : ceval on py2 and py3 < 3.11 invokes PyString_Format / PyUnicode_Format
+#   directly upon seeing BINARY_MODULO. This leads to bstr.__mod__ not being called.
+ctypedef unicode uformatfunc(object, object)
+ctypedef bytes   bformatfunc(object, object)
+cdef uformatfunc* _punicode_Format = PyUnicode_Format
+cdef unicode _unicode_xFormat(object s, object args):
+    return pyustr.__mod__(s, args)
+
+IF PY2:
+    cdef bformatfunc* _pbytes_Format = PyBytes_Format
+    cdef _bytes_xFormat(object s, object args):
+        return pybstr.__mod__(s, args)
+
+cdef _patch_capi_str_format():
+    cpatch(<void**>&_punicode_Format, <void*>_unicode_xFormat)
+    IF PY2:
+        cpatch(<void**>&_pbytes_Format, <void*>_bytes_xFormat)
+
+
+# XXX place, comments, test
+#py3.11: specializes instructions. e.g. ustr(obj) will specialize (after
+#    executing 8 times) to directly invoke
+#
+#   PyObject_Str(obj)
+#
+#    which, if obj is e.g. b'123' will return "b'123'" instead of "123".
+#
+#    -> if we patch str=ustr, we need to patch PyObject_Str as well.
+#    -> XXX and check all other specializations.
+#
+# NOTE also good to just do
+cdef _object_xStr(object s):
+    IF PY2:
+        return pybstr(s)
+    ELSE:
+        return pyustr(s)
+ctypedef object objstrfunc(object)
+cdef objstrfunc* _pobject_Str = PyObject_Str
+cdef  _patch_capi_object_str():
+    cpatch(<void**>&_pobject_Str, <void*>_object_xStr)
+
+
+
 # ---- misc ----
 
 cdef object _xpyu_coerce(obj):
@@ -1871,6 +1966,7 @@ cdef extern from "Python.h":
 from six import unichr                      # py2: unichr       py3: chr
 from six import int2byte as bchr            # py2: chr          py3: lambda x: bytes((x,))
 
+# XXX turn vvv into compile-time constant
 cdef bint _ucs2_build = (sys.maxunicode ==     0xffff)      #    ucs2
 assert    _ucs2_build or sys.maxunicode >= 0x0010ffff       # or ucs4
 
@@ -1910,7 +2006,7 @@ cdef (rune, int) _utf8_decode_rune(const byte[::1] s):
 
 
 # _utf8_decode_surrogateescape mimics s.decode('utf-8', 'surrogateescape') from py3.
-def _utf8_decode_surrogateescape(const byte[::1] s): # -> unicode
+cdef _utf8_decode_surrogateescape(const byte[::1] s): # -> unicode
     if PY_MAJOR_VERSION >= 3:
         if len(s) == 0:
             return u''  # avoid out-of-bounds slice access on &s[0]
@@ -1950,7 +2046,7 @@ def _utf8_decode_surrogateescape(const byte[::1] s): # -> unicode
 
 
 # _utf8_encode_surrogateescape mimics s.encode('utf-8', 'surrogateescape') from py3.
-def _utf8_encode_surrogateescape(s): # -> bytes
+cdef _utf8_encode_surrogateescape(s): # -> bytes
     assert isinstance(s, unicode)
     if PY_MAJOR_VERSION >= 3:
         return zunicode.encode(s, 'UTF-8', 'surrogateescape')
@@ -2032,3 +2128,289 @@ cdef unicode _xunichr(rune i):
         uh = i - 0x10000
         return unichr(0xd800 + (uh >> 10)) + \
                unichr(0xdc00 + (uh & 0x3ff))
+
+
+# ---- funchook wrappers -----
+
+cdef funchook_t* xfunchook_create() except NULL:
+    h = funchook_create()
+    if h == NULL:
+        raise MemoryError()
+    return h
+
+cdef xfunchook_destroy(funchook_t* h):
+    err = funchook_destroy(h)
+    if err != 0:
+        raise RuntimeError(funchook_error_message(h))
+
+cdef xfunchook_prepare(funchook_t* h, void** target_func, void* hook_func):
+    err = funchook_prepare(h, target_func, hook_func)
+    if err != 0:
+        raise RuntimeError(funchook_error_message(h))
+
+cdef xfunchook_install(funchook_t* h, int flags):
+    err = funchook_install(h, flags)
+    if err != 0:
+        raise RuntimeError(funchook_error_message(h))
+
+cdef xfunchook_uninstall(funchook_t* h, int flags):
+    err = funchook_uninstall(h, flags)
+    if err != 0:
+        raise RuntimeError(funchook_error_message(h))
+
+# cpatch = xfunchook_prepare on _patch_capi_hook
+cdef cpatch(void** target_func, void* hook_func):
+    assert target_func[0] != NULL
+    xfunchook_prepare(_patch_capi_hook, target_func, hook_func)
+
+
+# ---- patch unicode/str types to be ustr/bstr under gpython ----
+# XXX make sure original _pybstr/_pyustr cannot be used after patching      XXX right ?
+# XXX and make sure golang._golang._pybstr cannot be imported as well  (ex pickle)
+# XXX ._pyustr.__module__ = 'builtins' after patch      - why?
+
+def _():
+    gpy_strings = getattr(sys, '_gpy_strings', None)
+    if gpy_strings == 'bstr+ustr':
+        _patch_str()
+    elif gpy_strings in ('pystd', None):
+        pass
+    else:
+        raise AssertionError("invalid sys._gpy_strings: %r" % (gpy_strings,))
+_()
+
+# _patch_str is invoked when gpython imports golang and instructs to replace
+# builtin str/unicode types with bstr/ustr.
+#
+# After the patch is applied all existing objects that have e.g. unicode type
+# will switch to having ustr type.
+cdef PyTypeObject _unicode_orig
+cdef PyTypeObject _bytes_orig
+cdef funchook_t* _patch_capi_hook
+cdef _patch_str():
+    global zbytes,   _bytes_orig,   pybstr
+    global zunicode, _unicode_orig, pyustr
+    global _patch_capi_hook
+
+    #print('\n\nPATCH\n\n')
+
+    # XXX explain
+    bpreserve_slots = upreserve_slots = ("maketrans",)
+    if PY_MAJOR_VERSION < 3:
+        bpreserve_slots += ("encode",) # @property'ies
+        upreserve_slots += ("decode",)
+
+    # patch unicode to be pyustr. This patches
+    # - unicode (py2)
+    # - str     (py3)
+    _pytype_clone(<PyTypeObject*>unicode, &_unicode_orig, "unicode(pystd)")
+    Py_INCREF(unicode)  # XXX needed?
+    zunicode = <object>&_unicode_orig
+
+    _pytype_replace_by_child(
+            <PyTypeObject*>unicode, &_unicode_orig,
+            <PyTypeObject*>pyustr, "ustr(origin)",
+            upreserve_slots)
+    pyustr = unicode    # retarget pyustr -> unicode to where it was copied
+    # XXX vvv needed so that patched unicode could be saved by py2:cPickle at all
+    (<PyTypeObject*>pyustr).tp_name = ("unicode" if PY_MAJOR_VERSION < 3  else "str")
+
+    # py2: patch str to be pybstr
+    if PY_MAJOR_VERSION < 3:
+        _pytype_clone(<PyTypeObject*>bytes, &_bytes_orig, "bytes(pystd)")
+        Py_INCREF(bytes)    # XXX needed?
+        zbytes = <object>&_bytes_orig
+
+        _pytype_replace_by_child(
+                <PyTypeObject*>bytes, &_bytes_orig,
+                <PyTypeObject*>_pybstr, "bstr(origin)",
+                bpreserve_slots)
+        pybstr = bytes  # retarget pybstr -> bytes to where it was copied
+        (<PyTypeObject*>pybstr).tp_name = ("str" if PY_MAJOR_VERSION < 3  else "bytes")
+
+    # need to remove unsupported slots in cloned bstr/ustr again since PyType_Ready might have recreated them
+    _bstrustr_remove_unsupported_slots()
+
+
+    # also patch UserString to have methods that bstr/ustr have
+    # else e.g. IPython's guarded_eval.py fails in `_list_methods(collections.UserString, dir(str))`
+    from six.moves import UserString
+    def userstr__bytes__(s):    return bytes(s.data)
+    def userstr__unicode__(s):  return unicode(s.data)
+    assert not hasattr(UserString, '__bytes__')         # XXX test
+    assert not hasattr(UserString, '__unicode__')
+    UserString.__bytes__   = userstr__bytes__
+    UserString.__unicode__ = userstr__unicode__
+
+    # XXX also patch CAPI functions ... XXX explain
+    #funchook_set_debug_file("/dev/stderr")
+    _patch_capi_hook = xfunchook_create()
+
+    _patch_capi_str_format()
+    _patch_capi_object_str()
+    _patch_capi_unicode_decode_as_bstr()
+    _patch_str_pickle()
+    # ...
+
+    xfunchook_install(_patch_capi_hook, 0)
+
+
+# XXX place ok ?
+include '_golang_str_pickle.pyx'
+
+# _pytype_clone clones PyTypeObject src into dst.
+# dst must not be previously initialized.
+#
+# dst will have reference-count = 1 meaning new reference to it is returned.
+cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
+    assert (src.tp_flags & Py_TPFLAGS_READY) != 0
+    assert (src.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0    # src is not allocated on heap
+    #assert not PyType_IS_GC((<PyObject*>src).ob_type)  # XXX not true as unicode.ob_type is PyType_Type
+                                                        #     which generally has GC support, but
+                                                        #     GC is deactivated for non-heap types.
+    # copy the struct   XXX + .ob_next / .ob_prev (Py_TRACE_REFS)
+    dst[0] = src[0]
+    (<PyObject*>dst).ob_refcnt = 1
+
+    if new_name != NULL:
+        dst.tp_name = new_name
+
+    # now reinitialize things like .tp_dict etc, where PyType_Ready built slots that point to src.
+    # we want all those slots to be rebuilt and point to dst instead.
+    _dst = <_XPyTypeObject*>dst
+    dst .tp_flags &= ~Py_TPFLAGS_READY
+    dst .tp_dict     = NULL
+    _dst.tp_bases    = NULL
+    _dst.tp_mro      = NULL
+    _dst.tp_cache    = NULL
+    _dst.tp_weaklist = NULL
+
+    # dst.__subclasses__ will be empty because existing children inherit from src, not from dst.
+    _dst.tp_subclasses = NULL
+
+    PyType_Ready(<object>dst)
+    assert (dst.tp_flags & Py_TPFLAGS_READY) != 0
+
+# _pytype_replace_by_child replaces typ by its child egg.
+#
+# All existing objects that have type typ will switch to having type egg' .
+# The instance/inheritance diagram for existing objects and types will switch
+# as depicted below:
+#
+#           base                    base
+#            ↑                           ↖
+#           typ        ------>      egg' → typ_clone
+#          ↗ ↑ ↖                   ↗ ↑       ↗
+#   objects  X  egg         objects  X   egg
+#            ↑                       ↑
+#            Y                       Y
+#
+# typ_clone must be initialized via _pytype_clone(typ, typ_clone).
+# egg' is egg clone put inplace of typ
+#
+# XXX preserve_slots - describe
+cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
+                              PyTypeObject *egg, const char* egg_old_name,
+                              preserve_slots):
+    otyp = <PyObject*>typ           ; oegg = <PyObject*>egg
+    vtyp = <PyVarObject*>typ        ; vegg = <PyVarObject*>egg
+    _typ = <_XPyTypeObject*>typ     ; _egg = <_XPyTypeObject*>egg
+
+    assert egg.tp_base == typ
+    assert _egg.tp_subclasses == NULL
+
+    assert (typ.tp_flags & Py_TPFLAGS_READY)  != 0
+    assert (egg.tp_flags & Py_TPFLAGS_READY)  != 0
+
+    assert (typ.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
+    assert (egg.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 # XXX will be not true
+                                                     # -> ! Py_TPFLAGS_HAVE_GC
+                                                     # -> ? set Py_TPFLAGS_HEAPTYPE back on typ' ?
+
+    # (generally not required)
+    assert (typ.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
+    assert (egg.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
+    # XXX also check PyObject_IS_GC  (verifies .tp_is_gc() = n)  ?
+
+
+    assert vtyp.ob_size               ==  vegg.ob_size
+    assert typ .tp_basicsize          ==  egg .tp_basicsize
+    assert typ .tp_itemsize           ==  egg .tp_itemsize
+    IF PY3:
+        assert _typ.tp_vectorcall_offset  ==  _egg.tp_vectorcall_offset
+    assert _typ.tp_weaklistoffset     ==  _egg.tp_weaklistoffset
+    assert typ .tp_dictoffset         ==  egg .tp_dictoffset
+
+    # since egg will change .tp_base it will also need to reinitialize
+    # .tp_bases, .tp_mro and friends. Retrieve egg slots to preserve before we
+    # clear egg.__dict__ . This covers e.g. @staticmethod and @property.
+    keep_slots = {}  # name -> slot
+    for name in preserve_slots:
+        keep_slots[name] = _get_slot(egg, name)
+
+    # egg: clear what PyType_Ready will recompute
+    Py_CLEAR(egg .tp_dict)
+    Py_CLEAR(_egg.tp_bases)
+    Py_CLEAR(_egg.tp_mro)
+    Py_CLEAR(_egg.tp_cache)
+
+    # typ <- egg  preserving original typ's refcnt, weak references and subclasses\egg.
+    # typ will be now playing the role of egg
+    typ_refcnt     = otyp.ob_refcnt
+    typ_weaklist   = _typ.tp_weaklist
+    typ_subclasses = _typ.tp_subclasses
+    typ[0] = egg[0]
+    otyp.ob_refcnt     = typ_refcnt
+    _typ.tp_weaklist   = typ_weaklist
+    _typ.tp_subclasses = typ_subclasses # XXX need to remove egg from here
+
+    # adjust .tp_base
+    typ.tp_base = typ_clone
+    egg.tp_base = typ_clone
+
+    # adjust egg.tp_name
+    if egg_old_name != NULL:
+        egg.tp_name = egg_old_name
+
+    # reinitialize .tp_bases, .tp_mro. .tp_cache, and recompute slots that
+    # live in .tp_dict and point to their type. Do it for both typ (new egg)
+    # and origin egg for generality, even though original egg won't be used
+    # anymore.
+    typ.tp_flags &= ~Py_TPFLAGS_READY
+    egg.tp_flags &= ~Py_TPFLAGS_READY
+    PyType_Ready(<object>typ)
+    PyType_Ready(<object>egg)
+    assert (typ.tp_flags & Py_TPFLAGS_READY) != 0
+    assert (egg.tp_flags & Py_TPFLAGS_READY) != 0
+
+    # restore slots we were asked to preserve as is
+    # since those slots are e.g. @staticmethods they go to both egg' and egg.
+    for name, slot in keep_slots.items():
+        _patch_slot(typ, name, slot, asis=True)
+        _patch_slot(egg, name, slot, asis=True)
+
+    # XXX remove egg from typ.tp_subclasses     (also possible via setting .__bases__)
+    # XXX remove typ from base.tp_subclasses
+    #     else e.g. ustr(origin) is reported to be subclass of ustr by help()
+    #     (pyustr.__subclasses__()  give it)
+
+    # rebuild .tp_mro of all other typ's children
+    # initially X.__mro__ = (X, typ, base) and without rebuilding it would
+    # remain (X, egg', base) instead of correct (X, egg' typ_clone, base)
+    # XXX py3 does this automatically?  XXX -> no, it can invalidate .__mro__, but not .tp_mro
+    def refresh(x):
+        assert isinstance(x, type)
+        xtyp  = <PyTypeObject*>x
+        _xtyp = <_XPyTypeObject*>x
+        fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
+        assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
+        xtyp.tp_flags &= ~Py_TPFLAGS_READY
+        Py_CLEAR(_xtyp.tp_mro)
+        PyType_Ready(x)
+        assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
+        for _ in x.__subclasses__():
+            refresh(_)
+    for _ in (<object>typ).__subclasses__():
+        refresh(_)
+
+    # XXX also preserve ._ob_next + ._ob_prev  (present in Py_TRACE_REFS builds)
diff --git a/golang/_golang_str_pickle.S b/golang/_golang_str_pickle.S
new file mode 100644
index 0000000..3b954bc
--- /dev/null
+++ b/golang/_golang_str_pickle.S
@@ -0,0 +1,371 @@
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// _golang_str_pickle.S complements _golang_str_pickle.pyx with assembly routines.
+
+#include "golang/runtime/platform.h"
+
+    .text
+    .p2align 4
+
+// CSYM returns assembler symbol for C-symbol name
+#if defined(LIBGOLANG_OS_darwin) || \
+    (defined(LIBGOLANG_OS_windows) && defined(LIBGOLANG_ARCH_386))
+# define CSYM(name)  _ ## name
+#else
+# define CSYM(name)       name
+#endif
+
+// _TYPE emits `.type sym, symtype` on systems where .type directive is supported
+// _SIZE emits `.size sym, symsize` on systems where .size directive is supported
+#ifdef LIBGOLANG_OS_linux
+# define _TYPE(sym, symtype) .type sym, symtype
+# define _SIZE(sym, symsize) .size sym, symsize
+#else
+# define _TYPE(sym, type)
+# define _SIZE(sym, size)
+#endif
+
+// inside_counted provides trampoline to call *inside_counted_func with
+// counting how many times that function entered inside and exited.
+//
+// Each enter increments inside_counter, while each exit decrements it.
+// Recursion is supported up to STK_SIZE times with counter stopping to be
+// adjusted at deeper recursion levels.
+//
+// inside_counted can be used on functions with arbitrary signatures because
+// all registers and stack arguments are preserved exactly as is on the call(*).
+//
+// (*) NOTE₁ on-stack return address / link-register is adjusted during the call.
+//           this prevents inside_counted to be used with e.g. x86.get_pc_thunk.ax .
+//     NOTE₂ on ARM64 xip0 (x16) is clobbered.
+#define inside_counted          CSYM(inside_counted)
+#define inside_counted_func     CSYM(inside_counted_func)
+#define inside_counter          CSYM(inside_counter)
+#define inside_counted_stk      CSYM(inside_counted_stk)
+    .globl  inside_counted
+    _TYPE(  inside_counted, @function   )
+inside_counted:
+#define STK_SIZE 8
+
+// ---- X86_64 / i386 ----
+
+#if defined(LIBGOLANG_ARCH_amd64) || defined(LIBGOLANG_ARCH_386)
+#if defined(LIBGOLANG_ARCH_amd64)
+# define REGSIZE  8
+# define rAX    rax
+# define rPCNT  rbx
+# define rCNT   rcx
+# define rPSTK  rdx
+# define rSP    rsp
+# ifndef LIBGOLANG_OS_windows
+   .macro LEAGOT sym, reg
+     movq   \sym@GOTPCREL(%rip), %\reg
+   .endm
+# else
+   // windows does not use PIC and relocates DLLs when loading them
+   // there is no GOT and we need to access in-DLL symbols directly
+   // see e.g. https://stackoverflow.com/q/13309662/9456786 for details.
+   .macro LEAGOT sym, reg
+     leaq   \sym(%rip), %\reg   // NOTE pc-relative addressing used to avoid LNK2017:
+   .endm                        // 'ADDR32' relocation ... invalid without /LARGEADDRESSAWARE:NO
+# endif
+#else
+# define REGSIZE  4
+# define rAX    eax
+# define rPCNT  ebx
+# define rCNT   ecx
+# define rPSTK  edx
+# define rSP    esp
+# ifndef LIBGOLANG_OS_windows
+   .macro LEAGOT sym, reg
+     call   .Lget_pc_\reg
+     addl   $_GLOBAL_OFFSET_TABLE_, %\reg
+     movl   \sym@GOT(%\reg), %\reg
+   .endm
+# else
+   // windows does not use PIC - see details in ^^^ amd64 case
+   .macro LEAGOT sym, reg
+     leal \sym, %\reg
+   .endm
+# endif
+#endif
+
+    sub     $REGSIZE, %rSP    // make place for jmp-via-ret to *inside_counted_func
+
+    // TODO consider adding cfi_* annotations, but probably it won't be simple
+    //      since we manipulate retaddr on the stack
+
+    push    %rAX        // save registers we'll use
+    push    %rPCNT
+    push    %rCNT
+    push    %rPSTK
+#define SP_JMPVIARET    (4*REGSIZE)
+#define SP_RETORIG      (5*REGSIZE)
+
+    // jmp-via-ret = *inside_counted_func
+    LEAGOT  inside_counted_func, rAX                    // &inside_counted_func
+    mov     (%rAX), %rAX                                //  inside_counted_func
+    mov     %rAX, SP_JMPVIARET(%rSP)
+
+    // check whether altstk is overflowed
+    // if it is - invoke the func without counting
+    LEAGOT  inside_counter, rPCNT                       // &inside_counter
+    mov     (%rPCNT), %rCNT                             //  inside_counter
+    cmp     $STK_SIZE, %rCNT
+    jge     .Lcall
+
+    // altstk is not overflowed
+    // push original ret to altstk and replace the ret to return to us after the call
+    LEAGOT  inside_counted_stk, rPSTK                   // &inside_counted_stk
+    mov     SP_RETORIG(%rSP), %rAX                      // original ret address
+    mov     %rAX, (%rPSTK,%rCNT,REGSIZE)                // inside_counted_stk[inside_counter] = retorig
+    add     $1, %rCNT                                   // inside_counter++
+    mov     %rCNT, (%rPCNT)
+
+#if defined(LIBGOLANG_ARCH_amd64)
+    lea     .Laftercall(%rip), %rAX
+#else
+    call    .Lget_pc_eax
+    add     $(.Laftercall-.), %rAX
+#endif
+    mov     %rAX, SP_RETORIG(%rSP)                      // replace ret addr on stack to .Laftercall
+
+.Lcall:
+    // restore registers and invoke the func through jmp-via-ret
+    pop     %rPSTK
+    pop     %rCNT
+    pop     %rPCNT
+    pop     %rAX
+    ret
+
+.Laftercall:
+    // we get here after invoked func returns if altstk was not overflowed
+    // decrement inside_counter and return to original ret address
+    sub     $REGSIZE, %rSP  // make place for original ret
+    push    %rAX            // save registers
+    push    %rPCNT
+    push    %rCNT
+    push    %rPSTK
+#undef  SP_RETORIG
+#define SP_RETORIG      (4*REGSIZE)
+
+    LEAGOT  inside_counter, rPCNT                       // &inside_counter
+    mov     (%rPCNT), %rCNT                             //  inside_counter
+    sub     $1, %rCNT
+    mov     %rCNT, (%rPCNT)                             //  inside_counter--
+    LEAGOT  inside_counted_stk,  rPSTK                  // &inside_counted_stk
+    mov     (%rPSTK,%rCNT,REGSIZE), %rAX                // retorig = inside_counted_stk[inside_counter]
+    mov     %rAX, SP_RETORIG(%rSP)
+
+    // restore registers and return to original caller
+    pop     %rPSTK
+    pop     %rCNT
+    pop     %rPCNT
+    pop     %rAX
+    ret
+
+#if defined(LIBGOLANG_ARCH_386)
+.macro DEF_get_pc reg
+ .Lget_pc_\reg:
+    mov     (%esp), %\reg
+    ret
+.endm
+DEF_get_pc eax
+DEF_get_pc ebx
+DEF_get_pc ecx
+DEF_get_pc edx
+#endif
+
+// ---- ARM64 ----
+
+#elif defined(LIBGOLANG_ARCH_arm64)
+#define REGSIZE 8
+#define rPCNT   x0
+#define rCNT    x1
+#define rPSTK   x2
+#define rXIP0   x16
+    stp     rPCNT, rCNT, [sp, -16]!     // save registers we'll use
+    stp     rPSTK, xzr,  [sp, -16]!     // NOTE xip0 is clobbered
+
+    // xip0 = *inside_counted_func
+    adrp    rXIP0, :got:inside_counted_func
+    ldr     rXIP0, [rXIP0, :got_lo12:inside_counted_func]   // &inside_counted_func
+    ldr     rXIP0, [rXIP0]                                  //  inside_counted_func
+
+    // check whether altstk is overflowed
+    // if it is - invoke the func without counting
+    adrp    rPCNT, :got:inside_counter
+    ldr     rPCNT, [rPCNT, :got_lo12:inside_counter]        // &inside_counter
+    ldr     rCNT, [rPCNT]                                   //  inside_counter
+    cmp     rCNT, STK_SIZE
+    bge     .Lcall
+
+    // altstk is not overflowed
+    // push original ret to altstk and replace the ret to return to us after the call
+    adrp    rPSTK, :got:inside_counted_stk
+    ldr     rPSTK, [rPSTK, :got_lo12:inside_counted_stk]    // &inside_counted_stk
+    str     lr, [rPSTK, rCNT, lsl 3]                        // inside_counted_stk[inside_counter] = retorig
+    add     rCNT, rCNT, 1                                   // inside_counter++
+    str     rCNT, [rPCNT]
+
+    adr     lr, .Laftercall                                 // replace ret addr to .Laftercall
+
+.Lcall:
+    // restore registers and invoke the func via xip0
+    ldp     rPSTK, xzr,  [sp], 16
+    ldp     rPCNT, rCNT, [sp], 16
+    br      rXIP0
+
+.Laftercall:
+    // we get here after invoked func returns if altstk was not overflowed
+    // decrement inside_counter and return to original ret address
+    stp     rPCNT, rCNT, [sp, -16]!     // save registers
+    stp     rPSTK, xzr,  [sp, -16]!
+
+    adrp    rPCNT, :got:inside_counter
+    ldr     rPCNT, [rPCNT, :got_lo12:inside_counter]        // &inside_counter
+    ldr     rCNT, [rPCNT]                                   //  inside_counter
+    sub     rCNT, rCNT, 1
+    str     rCNT, [rPCNT]                                   //  inside_counter--
+    adrp    rPSTK, :got:inside_counted_stk
+    ldr     rPSTK, [rPSTK, :got_lo12:inside_counted_stk]    // &inside_counted_stk
+    ldr     lr, [rPSTK, rCNT, lsl 3]                        // lr = inside_counted_stk[inside_counter]
+
+    // restore registers and return to original caller
+    ldp     rPSTK, xzr,  [sp], 16
+    ldp     rPCNT, rCNT, [sp], 16
+    ret
+
+#else
+# error "unsupported architecture"
+#endif
+
+    _SIZE(  inside_counted, .-inside_counted    )
+
+// ---- data ---
+    .bss
+
+// void* inside_counted_func
+    .globl  inside_counted_func
+    .p2align 3  // 8
+    _TYPE(  inside_counted_func, @object    )
+    _SIZE(  inside_counted_func, REGSIZE    )
+inside_counted_func:
+    .zero   REGSIZE
+
+// long inside_counter
+    .globl  inside_counter
+    .p2align 3  // 8
+    _TYPE(  inside_counter, @object )
+    _SIZE(  inside_counter, REGSIZE )
+inside_counter:
+    .zero   REGSIZE
+
+// void* inside_counted_stk[STK_SIZE]
+    .globl  inside_counted_stk
+    .p2align 5  // 32
+    _TYPE(  inside_counted_stk, @object             )
+    _SIZE(  inside_counted_stk, STK_SIZE*REGSIZE    )
+inside_counted_stk:
+    .zero   STK_SIZE*REGSIZE
+
+
+// disable executable stack
+#ifndef LIBGOLANG_OS_windows
+    .section        .note.GNU-stack,"",@progbits
+#endif
+
+
+// ---- custom callconv proxies ----
+    .text
+    .p2align 4
+
+// saveprobe_<callconv>            (self, obj, pers_save)  input callconv, proxy to saveprobe
+// _pickle_Pickler_xsave_<callconv>(self, obj, pers_save)  input callconv, proxy to _pickle_Pickler_xsave
+// save_invoke_as_<callconv> (save, self, obj, pers_save)  input std, proxy to save invoked via callconv
+
+
+#if defined(LIBGOLANG_ARCH_386)
+
+#ifdef LIBGOLANG_CC_msc
+# define CSYM_FASTCALL3(name)   @name@12     // MSVC mangles __fastcall
+# define CSYM_FASTCALL4(name)   @name@16
+#else
+# define CSYM_FASTCALL3(name)   CSYM(name)
+# define CSYM_FASTCALL4(name)   CSYM(name)
+#endif
+
+// python-3.11.5.exe has _pickle.save accepting arguments in ecx,edx,stack but
+// contrary to fastcall the callee does not cleanup the stack.
+// Handle this as fastcall_nostkclean
+
+.macro FUNC_fastcall_nostkclean name
+    .globl  CSYM(\name\()_fastcall_nostkclean)
+    _TYPE(  CSYM(\name\()_fastcall_nostkclean), @function    )
+CSYM(\name\()_fastcall_nostkclean):
+    // we are proxying to fastcall - ecx and edx are already setup and we
+    // need to only duplicate the 3rd argument on the stack. Do this without
+    // clobbering any register.
+    sub     $4, %esp        // place to copy on-stack argument to
+    push    %eax
+    mov     12(%esp), %eax  // original on-stack arg
+    mov     %eax, 4(%esp)   // dup to copy
+    pop     %eax
+
+    call    CSYM_FASTCALL3(\name\()_ifastcall)
+    // ^^^ cleaned up the stack from our copy
+    // nothing to do anymore
+    ret
+    _SIZE(  CSYM(\name\()_fastcall_nostkclean), .-CSYM(\name\()_fastcall_nostkclean)  )
+.endm
+FUNC_fastcall_nostkclean  saveprobe
+FUNC_fastcall_nostkclean  _pickle_Pickler_xsave
+FUNC_fastcall_nostkclean  _zpickle_Pickler_xsave
+
+#define save_invoke_as_fastcall_nostkclean  CSYM_FASTCALL4(save_invoke_as_fastcall_nostkclean)
+    .globl  save_invoke_as_fastcall_nostkclean
+    _TYPE(  save_invoke_as_fastcall_nostkclean, @function   )
+save_invoke_as_fastcall_nostkclean:
+    // input:
+    //      ecx:     save
+    //      edx:     self
+    //      stk[1]:  obj
+    //      stk[2]:  pers_save
+    //
+    // invoke save as:
+    //      ecx:     self
+    //      edx:     obj
+    //      stk*[1]: pers_save
+
+    mov     8(%esp), %eax       // pers_save
+    push    %eax                // stk*[1] <- per_save
+
+    mov     %ecx, %eax          // eax <- save
+    mov     %edx, %ecx          // ecx <- self
+    mov     (4+4)(%esp), %edx   // edx <- obj
+
+    call    *%eax
+
+    // return with cleaning up stack
+    add     $4, %esp    // pers_save copy we created
+    ret     $8          // original arguments
+    _SIZE(  save_invoke_as_fastcall_nostkclean, .-save_invoke_as_fastcall_nostkclean)
+
+#endif  // 386
diff --git a/golang/_golang_str_pickle.pyx b/golang/_golang_str_pickle.pyx
new file mode 100644
index 0000000..ec091c2
--- /dev/null
+++ b/golang/_golang_str_pickle.pyx
@@ -0,0 +1,1325 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2023  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""_golang_str_pickle.pyx complements _golang_str.pyx and keeps everything
+related to pickling strings.
+
+It is included from _golang_str.pyx .
+
+The main entry-points are _patch_str_pickle and _patch_capi_unicode_decode_as_bstr.
+"""
+
+from cpython cimport PyUnicode_Decode
+from cpython cimport PyBytes_FromStringAndSize, _PyBytes_Resize
+
+cdef extern from "Python.h":
+    char* PyBytes_AS_STRING(PyObject*)
+    Py_ssize_t PyBytes_GET_SIZE(PyObject*)
+
+cdef extern from "Python.h":
+    ctypedef PyObject* (*PyCFunction)(PyObject*, PyObject*)
+    ctypedef struct PyMethodDef:
+        const char* ml_name
+        PyCFunction ml_meth
+    ctypedef struct PyCFunctionObject:
+        PyMethodDef *m_ml
+        PyObject*    m_self
+        PyObject*    m_module
+
+cdef extern from "structmember.h":
+    ctypedef struct PyMemberDef:
+        const char* name
+        int         type
+        Py_ssize_t  offset
+
+    enum:
+        T_INT
+
+from libc.stdlib cimport malloc, free
+from libc.string cimport memcpy, memcmp
+
+if PY_MAJOR_VERSION >= 3:
+    import copyreg as pycopyreg
+else:
+    import copy_reg as pycopyreg
+
+cdef object zbinary  # = zodbpickle.binary | None
+try:
+    import zodbpickle
+except ImportError:
+    zbinary = None
+else:
+    zbinary = zodbpickle.binary
+
+
+# support for pickling bstr/ustr as standalone types.
+#
+# pickling is organized in such a way that
+# - what is saved by py2 can be loaded correctly on both py2/py3,  and similarly
+# - what is saved by py3 can be loaded correctly on both py2/py3   as well.
+#
+# XXX place
+cdef _bstr__reduce_ex__(self, protocol):
+    # Ideally we want to emit bstr(BYTES), but BYTES is not available for
+    # protocol < 3. And for protocol < 3 emitting bstr(STRING) is not an
+    # option because plain py3 raises UnicodeDecodeError on loading arbitrary
+    # STRING data. However emitting bstr(UNICODE) works universally because
+    # pickle supports arbitrary unicode - including invalid unicode - out of
+    # the box and in exactly the same way on both py2 and py3. For the
+    # reference upstream py3 uses surrogatepass on encode/decode UNICODE data
+    # to achieve that.
+    if protocol < 3:
+        # use UNICODE for data
+        udata = _udata(pyu(self))
+        if protocol < 2:
+            return (self.__class__, (udata,))   # bstr UNICODE REDUCE
+        else:
+            return (pycopyreg.__newobj__,
+                    (self.__class__, udata))    # bstr UNICODE NEWOBJ
+    else:
+        # use BYTES for data
+        bdata = _bdata(self)
+        if PY_MAJOR_VERSION < 3:
+            # the only way we can get here on py2 and protocol >= 3 is zodbpickle
+            # -> similarly to py3 save bdata as BYTES
+            assert zbinary is not None
+            bdata = zbinary(bdata)
+        return (
+            pycopyreg.__newobj__,               # bstr BYTES   NEWOBJ
+            (self.__class__, bdata))
+
+cdef _ustr__reduce_ex__(self, protocol):
+    # emit ustr(UNICODE).
+    # TODO later we might want to switch to emitting ustr(BYTES)
+    #      even if we do this, it should be backward compatible
+    if protocol < 2:
+        return (self.__class__, (_udata(self),))# ustr UNICODE REDUCE
+    else:
+        return (pycopyreg.__newobj__,           # ustr UNICODE NEWOBJ
+                (self.__class__, _udata(self)))
+
+
+
+# types used while patching
+cdef extern from *:
+    """
+    struct PicklerObject;
+    """
+    struct PicklerObject:
+        pass
+
+cdef struct PicklerTypeInfo:
+    Py_ssize_t size                 # sizeof(PicklerObject)
+    Py_ssize_t off_bin              # offsetof `int bin`
+    Py_ssize_t off_poutput_buffer   # offsetof `PyObject *output_buffer`
+    Py_ssize_t off_output_len       # offsetof `Py_ssize_t output_len`
+    Py_ssize_t off_max_output_len   # offsetof `Py_ssize_t max_output_len`
+
+
+# XXX place ?
+cdef extern from * nogil:
+    r"""
+    // CALLCONV instructs compiler to use specified builtin calling convention.
+    // it should be used like this:
+    //
+    //  int CALLCONV(stdcall) myfunc(...)
+    #ifndef LIBGOLANG_CC_msc
+    # define CALLCONV(callconv) __attribute__((callconv))
+    #else // MSC
+    # define CALLCONV(callconv) __##callconv
+    #endif
+
+
+    // FOR_EACH_CALLCONV invokes macro X(ccname, callconv, cckind) for every supported calling convention.
+    // cckind is one of `builtin` or `custom`.
+    #ifdef LIBGOLANG_ARCH_386
+    # ifndef LIBGOLANG_CC_msc
+    #  define FOR_EACH_CALLCONV(X)  \
+         X(default,,                            builtin)    \
+         X(cdecl,       CALLCONV(cdecl),        builtin)    \
+         X(stdcall,     CALLCONV(stdcall),      builtin)    \
+         X(fastcall,    CALLCONV(fastcall),     builtin)    \
+         X(thiscall,    CALLCONV(thiscall),     builtin)    \
+         X(regparm1,    CALLCONV(regparm(1)),   builtin)    \
+         X(regparm2,    CALLCONV(regparm(2)),   builtin)    \
+         X(regparm3,    CALLCONV(regparm(3)),   builtin)    \
+         X(fastcall_nostkclean,  na,            custom )
+    # else // MSC
+    #  define FOR_EACH_CALLCONV(X)  \
+         X(default,,                            builtin)    \
+         X(cdecl,       CALLCONV(cdecl),        builtin)    \
+         X(stdcall,     CALLCONV(stdcall),      builtin)    \
+         X(fastcall,    CALLCONV(fastcall),     builtin)    \
+         /* X(CALLCONV(thiscall),   thiscall)   MSVC emits "C3865: '__thiscall': can only be used on native member functions"       */ \
+         /* in theory we can emulate thiscall via fastcall https://tresp4sser.wordpress.com/2012/10/06/how-to-hook-thiscall-functions/ */ \
+         X(vectorcall,  CALLCONV(vectorcall),   builtin)    \
+         X(fastcall_nostkclean,  na,            custom )
+    # endif
+    #elif defined(LIBGOLANG_ARCH_amd64)
+    # define FOR_EACH_CALLCONV(X)   \
+        X(default,,                             builtin)
+    #elif defined(LIBGOLANG_ARCH_arm64)
+    # define FOR_EACH_CALLCONV(X)   \
+        X(default,,             builtin)
+    #else
+    # error "unsupported architecture"
+    #endif
+
+    // Callconv denotes calling convention of a function.
+    enum Callconv {
+    #define CC_ENUM1(ccname, _, __) \
+        CALLCONV_##ccname,
+    FOR_EACH_CALLCONV(CC_ENUM1)
+    };
+
+    const char* callconv_str(Callconv cconv) {
+        using namespace golang;
+        switch(cconv) {
+        #define CC_STR1(ccname, _, __)  \
+        case CALLCONV_##ccname:       \
+            return #ccname;
+        FOR_EACH_CALLCONV(CC_STR1)
+        default:
+            panic("bug");
+        }
+    }
+
+    // SaveFunc represents a save function - its address and calling convention.
+    struct SaveFunc {
+        void*     addr;
+        Callconv  cconv;
+    };
+    """
+    enum Callconv: pass
+    const char* callconv_str(Callconv)
+    struct SaveFunc:
+        void*    addr
+        Callconv cconv
+
+# XXX doc
+cdef struct _pickle_PatchCtx:
+    initproc Unpickler_tp_xinit             # func to replace Unpickler.tp_init
+    initproc Unpickler_tp_init_orig         # what was there before
+
+    vector[SaveFunc]  Pickler_xsave_ccv     # func to replace _Pickler_save  (all callconv variants)
+    SaveFunc          Pickler_save_orig     # what was there before
+
+    PicklerTypeInfo iPickler                # information detected about PicklerObject type
+
+
+# patch contexts for _pickle and _zodbpickle modules
+cdef _pickle_PatchCtx _pickle_patchctx
+cdef _pickle_PatchCtx _zpickle_patchctx
+
+
+# _patch_str_pickle patches *pickle modules to support bstr/ustr and UTF-8 properly.
+#
+# STRING opcodes are handled in backward-compatible way:
+#
+# - *STRING are loaded as bstr
+# - bstr is saved as *STRING
+# - pickletools decodes *STRING as UTF-8
+cdef _patch_str_pickle():
+    try:
+        import zodbpickle
+    except ImportError:
+        zodbpickle = None
+
+    # py3: pickletools.dis raises UnicodeDecodeError on non-ascii STRING and treats *BINSTRING as latin1
+    #      -> decode as UTF8b instead
+    if PY_MAJOR_VERSION >= 3:
+        import pickletools, codecs
+        _codecs_escape_decode = codecs.escape_decode
+        def xread_stringnl(f):
+            data = _codecs_escape_decode(pickletools.read_stringnl(f, decode=False))[0]
+            return pybstr(data)
+        def xread_string1(f):
+            data = pickletools.read_string1(f).encode('latin1')
+            return pybstr(data)
+        def xread_string4(f):
+            data = pickletools.read_string4(f).encode('latin1')
+            return pybstr(data)
+
+        pickletools.stringnl.reader = xread_stringnl
+        pickletools.string1.reader  = xread_string1
+        pickletools.string4.reader  = xread_string4
+
+        if zodbpickle:
+            from zodbpickle import pickletools_3 as zpickletools
+            zpickletools.stringnl.reader = xread_stringnl   # was same logic as in std pickletools
+            zpickletools.string1.reader  = xread_string1
+            zpickletools.string4.reader  = xread_string4
+
+    # py3: pickle.load wants to treat *STRING as bytes and decode it as ASCII
+    #      -> adjust to decode to bstr instead
+    #      -> also save bstr via *STRING opcodes so that load/save is identity
+        import pickle, _pickle
+        # TODO _pickle not available (pypy)
+        _pickle_patchctx.Unpickler_tp_xinit = _pickle_Unpickler_xinit
+        _pickle_patchctx.Pickler_xsave_ccv  = _pickle_Pickler_xsave_ccv
+        _patch_pickle(pickle, _pickle, &_pickle_patchctx)
+
+        if zodbpickle:
+            from zodbpickle import pickle as zpickle, _pickle as _zpickle
+            from zodbpickle import slowpickle as zslowPickle, fastpickle as zfastPickle
+            # TODO _pickle / fastpickle not available (pypy)
+            for x in 'load', 'loads', 'Unpickler', 'dump', 'dumps', 'Pickler':
+                assert getattr(_zpickle, x) is getattr(zfastPickle, x)
+                assert getattr(zpickle, x)  is getattr(_zpickle, x)
+            _patch_pickle(zslowPickle, None, NULL)
+            _zpickle_patchctx.Unpickler_tp_xinit = _zpickle_Unpickler_xinit
+            _zpickle_patchctx.Pickler_xsave_ccv  = _zpickle_Pickler_xsave_ccv
+            _patch_pickle(None, zfastPickle, &_zpickle_patchctx)
+            # propagate changes from fastpickle -> _zpickle -> zpickle
+            _zpickle.load  = zfastPickle.load
+            _zpickle.loads = zfastPickle.loads
+            _zpickle.dump  = zfastPickle.dump
+            _zpickle.dumps = zfastPickle.dumps
+            assert _zpickle.Unpickler is zfastPickle.Unpickler
+            assert _zpickle.Pickler   is zfastPickle.Pickler
+            zpickle.load   = zfastPickle.load
+            zpickle.loads  = zfastPickle.loads
+            zpickle.dump   = zfastPickle.dump
+            zpickle.dumps  = zfastPickle.dumps
+            assert zpickle.Unpickler  is zfastPickle.Unpickler
+            assert zpickle.Pickler    is zfastPickle.Pickler
+
+# _patch_pickle serves _patch_str_pickle by patching pair of py-by-default and
+# C implementations of a pickle module.
+#
+# pickle or _pickle being None indicates that corresponding module version is not available.
+cdef _patch_pickle(pickle, _pickle, _pickle_PatchCtx* _pctx):
+    # if C module is available - it should shadow default py implementation
+    if _pickle is not None  and  pickle is not None:
+        assert pickle.load      is  _pickle.load
+        assert pickle.loads     is  _pickle.loads
+        assert pickle.Unpickler is  _pickle.Unpickler
+        assert pickle.dump      is  _pickle.dump
+        assert pickle.dumps     is  _pickle.dumps
+        assert pickle.Pickler   is  _pickle.Pickler
+
+    # patch C
+    if _pickle is not None:
+        _patch_cpickle(_pickle, _pctx)
+        # propagate C updates to py
+        if pickle is not None:
+            pickle.load      = _pickle.load
+            pickle.loads     = _pickle.loads
+            pickle.Unpickler = _pickle.Unpickler
+            pickle.dump      = _pickle.dump
+            pickle.dumps     = _pickle.dumps        # XXX needed?
+            pickle.Pickler   = _pickle.Pickler
+
+    # patch py
+    if pickle is not None:
+        _patch_pypickle(pickle, shadowed = (_pickle is not None))
+
+# _patch_pypickle serves _patch_pickle for py version.
+cdef _patch_pypickle(pickle, shadowed):
+    def pyattr(name):
+        if shadowed:
+            name = '_'+name
+        return getattr(pickle, name)
+
+    # adjust load / loads / Unpickler to use 'bstr' encoding by default
+    Unpickler = pyattr('Unpickler')
+    for f in pyattr('load'), pyattr('loads'), Unpickler.__init__:
+        f.__kwdefaults__['encoding'] = 'bstr'
+
+    # patch Unpickler._decode_string to handle 'bstr' encoding
+    # zodbpickle uses .decode_string from first version of patch from bugs.python.org/issue6784
+    has__decode = hasattr(Unpickler, '_decode_string')
+    has_decode  = hasattr(Unpickler, 'decode_string')
+    assert has__decode or has_decode
+    assert not (has__decode and has_decode)
+    _decode_string = '_decode_string'  if has__decode  else  'decode_string'
+
+    Unpickler_decode_string = getattr(Unpickler, _decode_string)
+    def _xdecode_string(self, value):
+        if self.encoding == 'bstr':
+            return pyb(value)
+        else:
+            return Unpickler_decode_string(self, value)
+    setattr(Unpickler, _decode_string, _xdecode_string)
+
+    # adjust Pickler to save bstr as STRING
+    from struct import pack
+    Pickler = pyattr('Pickler')
+    def save_bstr(self, obj):
+        cdef bint nonascii_escape  # unused
+        if self.proto >= 1:
+            n = len(obj)
+            if n < 256:
+                op = b'U' + bytes((n,)) + _bdata(obj)   # SHORT_BINSTRING
+            else:
+                op = b'T' + pack('<i', n) + _bdata(obj) # BINSTRING
+        else:
+            qobj = strconv._quote(obj, b"'", &nonascii_escape)
+            op = b'S' + qobj + b'\n'                    # STRING
+        self.write(op)
+        self.memoize(obj)
+    Pickler.dispatch[pybstr] = save_bstr
+
+# _patch_cpickle serves _patch_pickle for C version.
+cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx):
+    # adjust load / loads to use 'bstr' encoding by default
+    # builtin_function_or_method does not have __kwdefaults__  (defaults for
+    # arguments are hardcoded in generated C code)
+    # -> wrap functions
+    _pickle_load  = _pickle.load
+    _pickle_loads = _pickle.loads
+    def load (file,    *, **kw):
+        kw.setdefault('encoding', 'bstr')
+        return _pickle_load (file, **kw)
+    def loads(data,    *, **kw):
+        kw.setdefault('encoding', 'bstr')
+        return _pickle_loads(data, **kw)
+    _pickle.load  = load
+    _pickle.loads = loads
+
+    # adjust Unpickler to use 'bstr' encoding by default
+    assert isinstance(_pickle.Unpickler, type)
+    cdef _XPyTypeObject* Unpickler = <_XPyTypeObject*>(_pickle.Unpickler)
+
+    pctx.Unpickler_tp_init_orig = Unpickler.tp_init
+    Unpickler.tp_init = pctx.Unpickler_tp_xinit
+
+    def Unpickler_x__init__(self, *argv, **kw):
+        # NOTE don't return - just call: __init__ should return None
+        pctx.Unpickler_tp_xinit(self, <PyObject*>argv, <PyObject*>kw)
+
+    _patch_slot(<PyTypeObject*>Unpickler, '__init__', Unpickler_x__init__)
+    # decoding to bstr relies on _patch_capi_unicode_decode_as_bstr
+
+    # adjust Pickler to save bstr as *STRING
+    # it is a bit involved because:
+    # - save function, that we need to patch, is not exported.
+    # - _Pickle_Write, that we need to use from patched save, is not exported neither.
+    pctx.iPickler = _detect_Pickler_typeinfo(_pickle.Pickler)
+    pctx.Pickler_save_orig = save = _find_Pickler_save(_pickle.Pickler)
+    xsave = pctx.Pickler_xsave_ccv[save.cconv]
+    assert xsave.cconv == save.cconv, (callconv_str(xsave.cconv), callconv_str(save.cconv))
+    cpatch(&pctx.Pickler_save_orig.addr, xsave.addr)
+
+    # XXX test at runtime that we hooked save correctly
+
+
+# ---- adjusted C bits for loading ----
+
+# adjust Unpickler to use 'bstr' encoding by default and handle that encoding
+# in PyUnicode_Decode by returning bstr instead of unicode. This mirrors
+# corresponding py loading adjustments.
+
+cdef int _pickle_Unpickler_xinit(object self, PyObject* args, PyObject* kw) except -1:
+    xkw = {'encoding': 'bstr'}
+    if kw != NULL:
+        xkw.update(<object>kw)
+    return _pickle_patchctx.Unpickler_tp_init_orig(self, args, <PyObject*>xkw)
+
+cdef int _zpickle_Unpickler_xinit(object self, PyObject* args, PyObject* kw) except -1:
+    xkw = {'encoding': 'bstr'}
+    if kw != NULL:
+        xkw.update(<object>kw)
+    return _zpickle_patchctx.Unpickler_tp_init_orig(self, args, <PyObject*>xkw)
+
+ctypedef object unicode_decodefunc(const char*, Py_ssize_t, const char* encoding, const char* errors)
+cdef unicode_decodefunc* _punicode_Decode
+cdef object _unicode_xDecode(const char *s, Py_ssize_t size, const char* encoding, const char* errors):
+    if encoding != NULL  and  strcmp(encoding, 'bstr') == 0:
+        bobj = PyBytes_FromStringAndSize(s, size)  # TODO -> PyBSTR_FromStringAndSize directly
+        return pyb(bobj)
+    return _punicode_Decode(s, size, encoding, errors)
+
+cdef _patch_capi_unicode_decode_as_bstr():
+    global _punicode_Decode
+    _punicode_Decode = PyUnicode_Decode
+    cpatch(<void**>&_punicode_Decode, <void*>_unicode_xDecode)
+
+
+# ---- adjusted C bits for saving ----
+
+# adjust Pickler save to save bstr via *STRING opcodes.
+# This mirrors corresponding py saving adjustments, but is more involved to implement.
+
+cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
+    return __Pickler_xsave(&_pickle_patchctx, self, obj, pers_save)
+
+cdef int _zpickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
+    return __Pickler_xsave(&_zpickle_patchctx, self, obj, pers_save)
+
+# callconv wrappers XXX place
+cdef extern from *:
+    r"""
+    static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave(PicklerObject*, PyObject*, int);
+    static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(PicklerObject*, PyObject*, int);
+
+    #define DEF_PICKLE_XSAVE_builtin(ccname, callconv)                                      \
+    static int callconv                                                                     \
+    _pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj, int pers_save) {     \
+        return __pyx_f_6golang_7_golang__pickle_Pickler_xsave(self, obj, pers_save);        \
+    }
+    #define DEF_ZPICKLE_XSAVE_builtin(ccname, callconv)                                     \
+    static int callconv                                                                     \
+    _zpickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj, int pers_save) {    \
+        return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(self, obj, pers_save);       \
+    }
+
+    #define DEF_PICKLE_XSAVE_custom(ccname, _)                                              \
+        extern "C" char _pickle_Pickler_xsave_##ccname;
+    #define DEF_ZPICKLE_XSAVE_custom(ccname, _)                                             \
+        extern "C" char _zpickle_Pickler_xsave_##ccname;
+
+    #define DEF_PICKLE_XSAVE(ccname, callconv, cckind)  DEF_PICKLE_XSAVE_##cckind(ccname, callconv)
+    #define DEF_ZPICKLE_XSAVE(ccname, callconv, cckind) DEF_ZPICKLE_XSAVE_##cckind(ccname, callconv)
+
+    FOR_EACH_CALLCONV(DEF_PICKLE_XSAVE)
+    FOR_EACH_CALLCONV(DEF_ZPICKLE_XSAVE)
+
+    static std::vector<SaveFunc> _pickle_Pickler_xsave_ccv = {
+    #define PICKLE_CC_XSAVE(ccname, _, __)  \
+        SaveFunc{(void*)&_pickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
+    FOR_EACH_CALLCONV(PICKLE_CC_XSAVE)
+    };
+
+    static std::vector<SaveFunc> _zpickle_Pickler_xsave_ccv = {
+    #define ZPICKLE_CC_XSAVE(ccname, _, __) \
+        SaveFunc{(void*)&_zpickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
+    FOR_EACH_CALLCONV(ZPICKLE_CC_XSAVE)
+    };
+
+    // proxy for asm routines to invoke _pickle_Pickler_xsave and _zpickle_Pickler_xsave
+    #ifdef LIBGOLANG_ARCH_386
+    extern "C" int CALLCONV(fastcall)
+    _pickle_Pickler_xsave_ifastcall(PicklerObject* self, PyObject* obj, int pers_save) {
+        return __pyx_f_6golang_7_golang__pickle_Pickler_xsave(self, obj, pers_save);
+    }
+    extern "C" int CALLCONV(fastcall)
+    _zpickle_Pickler_xsave_ifastcall(PicklerObject* self, PyObject* obj, int pers_save) {
+        return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(self, obj, pers_save);
+    }
+    #endif
+    """
+    vector[SaveFunc] _pickle_Pickler_xsave_ccv
+    vector[SaveFunc] _zpickle_Pickler_xsave_ccv
+
+
+cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, int pers_save) except -1:
+    # !bstr -> use builtin pickle code
+    if obj.ob_type != <PyTypeObject*>pybstr:
+        return save_invoke(pctx.Pickler_save_orig.addr, pctx.Pickler_save_orig.cconv,
+                                self, obj, pers_save)
+
+    # bstr  -> pickle it as *STRING
+    cdef const char* s
+    cdef Py_ssize_t  l
+    cdef byte[5]     h
+    cdef Py_ssize_t  lh = 1;
+    cdef bint nonascii_escape
+
+    cdef int bin = (<int*>((<byte*>self) + pctx.iPickler.off_bin))[0]
+    if bin == 0:
+        esc = strconv._quote(<object>obj, "'", &nonascii_escape)
+        assert type(esc) is bytes
+        s = PyBytes_AS_STRING(<PyObject*>esc)
+        l = PyBytes_GET_SIZE(<PyObject*>esc)
+        __Pickler_xWrite(pctx, self, b'S', 1)   # STRING
+        __Pickler_xWrite(pctx, self, s, l)
+        __Pickler_xWrite(pctx, self, b'\n', 1)
+
+    else:
+        s = PyBytes_AS_STRING(obj)
+        l = PyBytes_GET_SIZE(obj)
+        if l < 0x100:
+            h[0] = b'U'     # SHORT_BINSTRING
+            h[1] = <byte>l
+            lh += 1
+        elif l < 0x7fffffff:
+            h[0] = b'T'     # BINSTRING
+            h[1] = <byte>(l >> 0)
+            h[2] = <byte>(l >> 8)
+            h[3] = <byte>(l >> 16)
+            h[4] = <byte>(l >> 24)
+            lh += 4
+        else:
+            raise OverflowError("cannot serialize a string larger than 2 GiB")
+
+        __Pickler_xWrite(pctx, self, <char*>h, lh)
+        __Pickler_xWrite(pctx, self, s, l)
+
+    return 0
+
+
+# __Pickler_xWrite mimics original _Pickler_Write.
+#
+# we have to implement it ourselves because there is no way to discover
+# original _Pickler_Write address: contrary to `save` function _Pickler_Write
+# is small and is not recursive. A compiler is thus free to create many
+# versions of it with e.g. constant propagation and to inline it freely. The
+# latter actually happens for real on LLVM which for py3.11 inlines
+# _Pickler_Write fully without leaving any single freestanding instance of it.
+#
+# XXX explain why we can skip flush in zpickle case
+# XXX explain that we do not emit FRAME
+cdef int __Pickler_xWrite(_pickle_PatchCtx* pctx, PicklerObject* self, const char* s, Py_ssize_t l) except -1:
+    ppoutput_buffer = <PyObject**> (<byte*>self + pctx.iPickler.off_poutput_buffer)
+    poutput_len     = <Py_ssize_t*>(<byte*>self + pctx.iPickler.off_output_len)
+    pmax_output_len = <Py_ssize_t*>(<byte*>self + pctx.iPickler.off_max_output_len)
+
+    assert ppoutput_buffer[0].ob_type == &PyBytes_Type
+    assert l >= 0
+    assert poutput_len[0] >= 0
+
+    if l > PY_SSIZE_T_MAX - poutput_len[0]:
+        raise MemoryError() # overflow
+
+    need = poutput_len[0] + l
+    if need > pmax_output_len[0]:
+        if need >= PY_SSIZE_T_MAX // 2:
+            raise MemoryError()
+        pmax_output_len[0] = need // 2 * 3
+        _PyBytes_Resize(ppoutput_buffer, pmax_output_len[0])
+
+    buf = PyBytes_AS_STRING(ppoutput_buffer[0])
+    memcpy(buf + poutput_len[0], s, l)
+    poutput_len[0] += l
+
+    return 0
+
+
+# ---- infrastructure to assist patching C saving codepath ----
+
+# _detect_Pickler_typeinfo detects information about PicklerObject type
+# through runtime introspection.
+#
+# This information is used mainly by __Pickler_xWrite.
+cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *:
+    cdef PicklerTypeInfo t
+
+    cdef bint debug = False
+    def trace(*argv):
+        if debug:
+            print(*argv)
+    trace()
+
+    assert isinstance(pyPickler, type)
+    cdef PyTypeObject*   Pickler  = <PyTypeObject*>   pyPickler
+    cdef _XPyTypeObject* xPickler = <_XPyTypeObject*> pyPickler
+
+    # sizeof
+    assert Pickler.tp_basicsize  > 0
+    assert Pickler.tp_itemsize  == 0
+    t.size = Pickler.tp_basicsize
+    trace('size:\t', t.size)
+
+    # busy keeps offsets of all bytes for already detected fields
+    busy = set()
+    def markbusy(off, size):
+        for _ in range(off, off+size):
+            assert _ not in busy,  (_, busy)
+            assert 0 < off <= t.size
+            busy.add(_)
+
+    # .bin
+    cdef PyMemberDef* mbin = tp_members_lookup(xPickler.tp_members, 'bin')
+    assert mbin.type == T_INT, (mbin.type,)
+    t.off_bin = mbin.offset
+    markbusy(t.off_bin, sizeof(int))
+    trace('.bin:\t', t.off_bin)
+
+    # .output_buffer
+    #
+    #   1) new Pickler
+    #   2) .memo = {}    - the only pointer that changes is .memo (PyMemoTable* - not pyobject)
+    #   3) .tp_clear()   - all changed words are changed to 0 and cover non-optional PyObject* and memo
+    #   4) .__init__()
+    #   5) go through offsets of all pyobjects and find the one with .ob_type = PyBytes_Type
+    #      -> that is .output_buffer
+
+    #       1)
+    class Null:
+        def write(self, data): pass
+    pyobj = pyPickler(Null())
+    cdef PyObject* obj = <PyObject*>pyobj
+    assert obj.ob_type == Pickler
+
+    cdef byte* bobj  = <byte*>obj
+    cdef byte* bobj2 = <byte*>malloc(t.size)
+    # obj_copy copies obj to obj2.
+    def obj_copy():
+        memcpy(bobj2, bobj, t.size)
+    # obj_diff finds difference in between obj2 and obj.
+    def obj_diff(Py_ssize_t elemsize): # -> []offset
+        assert (elemsize & (elemsize - 1)) == 0,  elemsize # elemsize is 2^x
+        cdef Py_ssize_t off
+
+        # skip PyObject_HEAD
+        off = sizeof(PyObject)
+        off = (off + elemsize - 1) & (~(elemsize - 1))
+        assert off % elemsize == 0
+
+        # find out offsets of different elements
+        vdelta = []
+        while off + elemsize <= t.size:
+            if memcmp(bobj + off, bobj2 + off, elemsize):
+                vdelta.append(off)
+            off += elemsize
+
+        return vdelta
+
+    #       2)
+    obj_copy()
+    pyobj.memo = {}
+    dmemo = obj_diff(sizeof(void*))
+    assert len(dmemo) == 1,  dmemo
+    off_memo = dmemo[0]
+    markbusy(off_memo, sizeof(void*))
+    trace('.memo:\t', off_memo)
+
+    #       3)
+    assert Pickler.tp_clear != NULL
+    obj_copy()
+    Pickler.tp_clear(pyobj)
+    pointers = obj_diff(sizeof(void*))
+    for poff in pointers:
+        assert (<void**>(bobj + <Py_ssize_t>poff))[0] == NULL
+    assert off_memo in pointers
+    pyobjects = pointers[:]
+    pyobjects.remove(off_memo)
+    trace('pyobjects:\t', pyobjects)
+
+    #       4)
+    pyobj.__init__(Null())
+
+    #       5)
+    cdef PyObject* bout = NULL
+    t.off_poutput_buffer = 0
+    for poff in pyobjects:
+        x = (<PyObject**>(bobj + <Py_ssize_t>poff))[0]
+        if x.ob_type == &PyBytes_Type:
+            if t.off_poutput_buffer == 0:
+                t.off_poutput_buffer = poff
+            else:
+                raise AssertionError("found several <bytes> inside Pickler")
+    assert t.off_poutput_buffer != 0
+    markbusy(t.off_poutput_buffer, sizeof(PyObject*))
+    trace(".output_buffer:\t", t.off_poutput_buffer)
+
+    # .output_len  +  .max_output_len
+    # dump something small and expected -> find out which field changes correspondingly
+    import io
+    output_len     = None
+    max_output_len = None
+    for n in range(1,10):
+        f = io.BytesIO()
+        pyobj.__init__(f, 0)
+        o = (None,)*n
+        pyobj.dump(o)
+        p = f.getvalue()
+        phok = b'(' + b'N'*n + b't'  # full trails with "p0\n." but "p0\n" is optional
+        assert p.startswith(phok), p
+
+        # InspectWhilePickling observes obj while the pickling is going on:
+        # - sees which fields have changes
+        # - sees which fields are candidates for max_output_len
+        class InspectWhilePickling:
+            def __init__(self):
+                self.diff     = None  # what changes
+                self.doff2val = {}    # off from .diff  ->  Py_ssize_t read from it
+                self.max_output_len = set() # offsets that are candidates for .max_output_len
+
+            def __reduce__(self):
+                self.diff = obj_diff(sizeof(Py_ssize_t))
+                for off in self.diff:
+                    self.doff2val[off] = (<Py_ssize_t*>(bobj + <Py_ssize_t>off))[0]
+
+                cdef PyObject* output_buffer = \
+                        (<PyObject**>(bobj + t.off_poutput_buffer))[0]
+                assert output_buffer.ob_type == &PyBytes_Type
+                off = sizeof(PyObject)
+                off = (off + sizeof(Py_ssize_t) - 1) & (~(sizeof(Py_ssize_t) - 1))
+                assert off % sizeof(Py_ssize_t) == 0
+                while off + sizeof(Py_ssize_t) <= t.size:
+                    v = (<Py_ssize_t*>(bobj + <Py_ssize_t>off))[0]
+                    if v == PyBytes_GET_SIZE(output_buffer):
+                        self.max_output_len.add(off)
+                    off += sizeof(Py_ssize_t)
+
+                return (int, ())    # arbitrary
+
+        pyobj.__init__(Null(), 0)
+        i = InspectWhilePickling()
+        o += (i,)
+        obj_copy()
+        pyobj.dump(o)
+        assert i.diff is not None
+        #trace('n%d  diff: %r\toff2val: %r' % (n, i.diff, i.doff2val))
+        #trace('     ', busy)
+
+        noutput_len = set()
+        for off in i.diff:
+            if off not in busy:
+                if i.doff2val[off] == (len(phok)-1): # (NNNN without t yet
+                    noutput_len.add(off)
+        assert len(noutput_len) >= 1, noutput_len
+        if output_len is None:
+            output_len = noutput_len
+        else:
+            output_len.intersection_update(noutput_len)
+
+        nmax_output_len = set()
+        for off in i.max_output_len:
+            if off not in busy:
+                nmax_output_len.add(off)
+        assert len(nmax_output_len) >= 1, nmax_output_len
+        if max_output_len is None:
+            max_output_len = nmax_output_len
+        else:
+            max_output_len.intersection_update(nmax_output_len)
+
+    if len(output_len) != 1:
+        raise AssertionError("cannot find .output_len")
+    if len(max_output_len) != 1:
+        raise AssertionError("cannot find .max_output_len")
+
+    t.off_output_len = output_len.pop()
+    markbusy(t.off_output_len, sizeof(Py_ssize_t))
+    trace(".output_len:\t", t.off_output_len)
+
+    t.off_max_output_len = max_output_len.pop()
+    markbusy(t.off_max_output_len, sizeof(Py_ssize_t))
+    trace(".max_output_len:\t", t.off_max_output_len)
+
+    free(bobj2)
+    return t
+
+
+# _find_Pickler_save determines address and calling convention of `save` C
+# function associated with specified Pickler.
+#
+# Address and calling convention of `save` are needed to be able to patch it.
+cdef SaveFunc _find_Pickler_save(pyPickler) except *:
+    cdef SaveFunc save
+    save.addr  = __find_Pickler_save(pyPickler)
+    save.cconv = __detect_save_callconv(pyPickler, save.addr)
+    #fprintf(stderr, "save.addr:  %p\n", save.addr)
+    #fprintf(stderr, "save.cconv: %s\n", callconv_str(save.cconv))
+    return save
+
+cdef void* __find_Pickler_save(pyPickler) except NULL:
+    assert isinstance(pyPickler, type)
+
+    # start from _pickle_Pickler_dump as root and analyze how called functions
+    # behave wrt pickling deep chain of objects. We know whether a callee leads
+    # to save if, upon receiving control in our __reduce__, we see that the
+    # callee was entered and did not exited yet. If we find such a callee, we
+    # recourse the process and start to analyze functions that the callee invokes
+    # itself. We detect reaching save when we see that a callee was entered
+    # many times recursively. That happens because we feed deep recursive
+    # structure to the pickle, and because save itself is organized to invoke
+    # itself recursively - e.g. (obj,) is pickled via save -> save_tuple -> save.
+    cdef _XPyTypeObject* Pickler = <_XPyTypeObject*>(pyPickler)
+    cdef PyMethodDef*    mdump   = tp_methods_lookup(Pickler.tp_methods, 'dump')
+    #print("%s _pickle_Pickler_dump:" % pyPickler)
+    addr = <void*>mdump.ml_meth  # = _pickle_Pickler_dump
+    while 1:
+        vcallee = cfunc_direct_callees(addr)
+        ok = False
+        for i in range(vcallee.size()):
+            callee = vcallee[i]
+            #fprintf(stderr, "checking %p ...\n", callee)
+            nentry = _nentry_on_deep_save(pyPickler, callee)
+            #fprintf(stderr, "%p  - %ld\n", callee, nentry)
+            assert nentry in (0, 1)  or  nentry > 5,  nentry
+            if nentry > 5:
+                return callee   # found save
+            if nentry == 1:
+                addr = callee   # found path that will lead to save
+                ok = True
+                break
+
+        if not ok:
+            raise AssertionError('cannot find path leading to save')
+
+# _nentry_on_deep_save tests how addr is related to `save` via inspecting
+# addr entry count when Pickler is feed deep recursive structure.
+#
+# if #entry is 0   - addr is unrelated to save
+# if #entry is 1   - addr is related to save and calls it
+# if #entry is big - addr is save
+cdef long _nentry_on_deep_save(pyPickler, void* addr) except -1: # -> nentry
+    # below we rely on inside_counted which alters return address during the
+    # call to wrapped func. In practice this does not create problems on x86_64
+    # and arm64, but on i386 there are many calls to functions like
+    # x86.get_pc_thunk.ax which are used to implement PC-relative addressing.
+    # If we let inside_counted to hook such a func it will result in a crash
+    # because returned address will be different from real PC of the caller.
+    # Try to protect us from entering into such situation by detecting leaf
+    # functions and not hooking them. For the reference x86.get_pc_thunk.ax is:
+    #
+    #       movl (%esp), %eax
+    #       ret
+    vcallee = cfunc_direct_callees(addr)
+    if vcallee.size() == 0:
+        return 0
+
+    # InspectWhilePickling observes how many times currently considered
+    # function was entered at the point of deep recursion inside save.
+    class InspectWhilePickling:
+        def __init__(self):
+            self.inside_counter = None
+        def __reduce__(self):
+            self.inside_counter = inside_counter
+            return (int, ())    # arbitrary
+
+    class Null:
+        def write(self, data): pass
+
+    i = InspectWhilePickling()
+    obj = (i,)
+    for _ in range(20):
+        obj = (obj,)
+
+    p = pyPickler(Null(), 0)
+
+    h = xfunchook_create()
+    global inside_counted_func
+    inside_counted_func = addr
+    xfunchook_prepare(h, &inside_counted_func, <void*>inside_counted)
+    xfunchook_install(h, 0)
+    p.dump(obj)
+    xfunchook_uninstall(h, 0)
+    xfunchook_destroy(h)
+
+    assert i.inside_counter is not None
+    return i.inside_counter
+
+
+# inside_counted is used to patch a function to count how many times that
+# function is entered/leaved.
+cdef extern from * nogil: # see _golang_str_pickle.S for details
+    """
+    extern "C" {
+         extern void  inside_counted();
+         extern void* inside_counted_func;
+         extern long  inside_counter;
+    }
+    """
+    void  inside_counted()
+    void* inside_counted_func
+    long  inside_counter
+
+
+# __detect_save_callconv determines calling convention that compiler used for save.
+#
+# On architectures with many registers - e.g. x86_64 and arm64 - the calling
+# convention is usually the same as default, but on e.g. i386 - where the
+# default cdecl means to put arguments on the stack, the compiler usually
+# changes calling convention to use registers instead.
+cdef Callconv __detect_save_callconv(pyPickler, void* save) except *:
+    for p in saveprobe_test_ccv:
+        #print("save: probing %s" % callconv_str(p.cconv))
+        good = __save_probe1(pyPickler, save, p.addr)
+        #print("  ->", good)
+        if good:
+            return p.cconv
+    bad = "cannot determine save calling convention\n\n"
+    bad += "probed:\n"
+    for p in saveprobe_test_ccv:
+        bad += "  - %s\t; callee_stkcleanup: %d\n" % (callconv_str(p.cconv), cfunc_is_callee_cleanup(p.addr))
+    bad += "\n"
+    bad += "save callee_stkcleanup: %d\n" % cfunc_is_callee_cleanup(save)
+    bad += "save disassembly:\n%s" % cfunc_disasm(save)
+    raise AssertionError(bad)
+
+cdef bint __save_probe1(pyPickler, void* save, void* cfunc) except *:
+    # first see whether stack is cleaned up by caller or callee and how much.
+    # we need to do this first to avoid segfault if we patch save with cfunc
+    # with different stack cleanup as the probe.
+    save_stkclean  = cfunc_is_callee_cleanup(save)
+    cfunc_stkclean = cfunc_is_callee_cleanup(cfunc)
+    if save_stkclean != cfunc_stkclean:
+        return False
+
+    # now when we know that save and cfunc have the same stack cleanup protocol, we can start probing
+    global saveprobe_ncall, saveprobe_self, saveprobe_obj, saveprobe_pers_save
+    saveprobe_ncall = 0
+    saveprobe_self  = NULL
+    saveprobe_obj   = NULL
+    saveprobe_pers_save = 0xdeafbeaf
+
+    class Null:
+        def write(self, data): pass
+    p = pyPickler(Null(), 0)
+    obj = object()
+
+    h = xfunchook_create()
+    xfunchook_prepare(h, &save, cfunc)
+    xfunchook_install(h, 0)
+    p.dump(obj)
+    xfunchook_uninstall(h, 0)
+    xfunchook_destroy(h)
+
+    assert saveprobe_ncall == 1, saveprobe_ncall
+    good = (saveprobe_self == <void*>p    and \
+            saveprobe_obj  == <void*>obj  and \
+            saveprobe_pers_save == 0)
+    return good
+
+cdef extern from * nogil:
+    r"""
+    static int    saveprobe_ncall;
+    static void*  saveprobe_self;
+    static void*  saveprobe_obj;
+    static int    saveprobe_pers_save;
+
+    static int saveprobe(void* self, PyObject* obj, int pers_save) {
+        saveprobe_ncall++;
+        saveprobe_self = self;
+        saveprobe_obj  = obj;
+        saveprobe_pers_save = pers_save;
+        return 0; // do nothing
+    }
+
+    #define DEF_SAVEPROBE_builtin(ccname, callconv)                     \
+        static int callconv                                             \
+        saveprobe_##ccname(void* self, PyObject* obj, int pers_save) {  \
+            return saveprobe(self, obj, pers_save);                     \
+        }
+    #define DEF_SAVEPROBE_custom(ccname, _)                             \
+        extern "C" char saveprobe_##ccname;
+    #define DEF_SAVEPROBE(ccname, callconv, cckind) DEF_SAVEPROBE_##cckind(ccname, callconv)
+    FOR_EACH_CALLCONV(DEF_SAVEPROBE)
+
+    static std::vector<SaveFunc> saveprobe_test_ccv = {
+    #define CC_SAVEPROBE(ccname, _, __) \
+        SaveFunc{(void*)&saveprobe_##ccname, CALLCONV_##ccname},
+    FOR_EACH_CALLCONV(CC_SAVEPROBE)
+    };
+
+    // proxy for asm routines to invoke saveprobe
+    #ifdef LIBGOLANG_ARCH_386
+    extern "C" int CALLCONV(fastcall)
+    saveprobe_ifastcall(void* self, PyObject* obj, int pers_save) { \
+        return saveprobe(self, obj, pers_save);                     \
+    }
+    #endif
+    """
+    int   saveprobe_ncall
+    void* saveprobe_self
+    void* saveprobe_obj
+    int   saveprobe_pers_save
+
+    vector[SaveFunc] saveprobe_test_ccv
+
+
+# XXX doc save_invoke ...
+# XXX place
+cdef extern from *:
+    r"""
+    #define CC_SAVE_DEFCALL1_builtin(ccname, callconv)
+    #define CC_SAVE_DEFCALL1_custom(ccname, _)  \
+        extern "C" int CALLCONV(fastcall)       \
+        save_invoke_as_##ccname(void* save, void* self, PyObject* obj, int pers_save);
+    #define CC_SAVE_DEFCALL1(ccname, callconv, cckind)  CC_SAVE_DEFCALL1_##cckind(ccname, callconv)
+    FOR_EACH_CALLCONV(CC_SAVE_DEFCALL1)
+
+    static int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) {
+        using namespace golang;
+
+        switch(cconv) {
+    #define CC_SAVE_CALL1_builtin(ccname, callconv)     \
+        case CALLCONV_ ## ccname:                                   \
+            return ((int (callconv *)(void*, PyObject*, int))save)  \
+                    (self, obj, pers_save);
+    #define CC_SAVE_CALL1_custom(ccname, _)             \
+        case CALLCONV_ ## ccname:                                   \
+            return save_invoke_as_##ccname(save, self, obj, pers_save);
+    #define CC_SAVE_CALL1(ccname, callconv, cckind) CC_SAVE_CALL1_##cckind(ccname, callconv)
+    FOR_EACH_CALLCONV(CC_SAVE_CALL1)
+        default:
+            panic("unreachable");
+        }
+    }
+    """
+    int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) except -1
+
+
+# - cfunc_direct_callees returns addresses of functions that cfunc calls directly.
+#
+# - cfunc_is_callee_cleanup determines whether cfunc does stack cleanup by
+#   itself and for how much.
+#
+# - cfunc_disassembly returns disassembly of cfunc.
+#
+# XXX dedup iterating instructions -> DisasmIter
+cdef extern from "capstone/capstone.h" nogil:
+    r"""
+    #include <algorithm>
+    #include "golang/fmt.h"
+
+    #if defined(LIBGOLANG_ARCH_amd64)
+    # define MY_ARCH    CS_ARCH_X86
+    # define MY_MODE    CS_MODE_64
+    #elif defined(LIBGOLANG_ARCH_386)
+    # define MY_ARCH    CS_ARCH_X86
+    # define MY_MODE    CS_MODE_32
+    #elif defined(LIBGOLANG_ARCH_arm64)
+    # define MY_ARCH    CS_ARCH_ARM64
+    # define MY_MODE    CS_MODE_LITTLE_ENDIAN
+    #else
+    # error "unsupported architecture"
+    #endif
+
+    static std::tuple<uint64_t, bool> _insn_getimm1(cs_arch arch, cs_insn* ins);
+    std::vector<void*> cfunc_direct_callees(void *cfunc) {
+        const bool debug = false;
+
+        using namespace golang;
+        using std::tie;
+        using std::max;
+
+        std::vector<void*> vcallee;
+
+        csh       h;
+        cs_insn*  ins;
+        cs_err    err;
+
+        cs_arch arch = MY_ARCH;
+        err = cs_open(arch, MY_MODE, &h);
+        if (err) {
+            fprintf(stderr, "cs_open: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        err = cs_option(h, CS_OPT_DETAIL, CS_OPT_ON);
+        if (err) {
+            fprintf(stderr, "cs_option: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        ins = cs_malloc(h);
+        if (ins == nil)
+            panic("cs_malloc failed");
+
+        const byte* code = (const byte*)cfunc;
+        size_t      size = 10*1024; // something sane and limited
+        uint64_t    addr = (uint64_t)cfunc;
+        uint64_t    maxjump = addr;
+        while (cs_disasm_iter(h, &code, &size, &addr, ins)) {
+            if (debug)
+                fprintf(stderr, "0x%" PRIx64 ":\t%s\t\t%s\n", ins->address, ins->mnemonic, ins->op_str);
+
+            if (cs_insn_group(h, ins, CS_GRP_RET)) {
+                if (ins->address >= maxjump)
+                    break;
+                continue;
+            }
+
+            uint64_t imm1;
+            bool     imm1ok;
+            tie(imm1, imm1ok) = _insn_getimm1(arch, ins);
+
+            bool call = cs_insn_group(h, ins, CS_GRP_CALL);
+            bool jump = cs_insn_group(h, ins, CS_GRP_JUMP) && !call;  // e.g. BL on arm64 is both jump and call
+
+            if (jump && imm1ok) {
+                maxjump = max(maxjump, imm1);
+                continue;
+            }
+
+            if (call && imm1ok) {
+                void* callee = (void*)imm1;
+                if (debug)
+                    fprintf(stderr, "  *** DIRECT CALL -> %p\n", callee);
+                if (!std::count(vcallee.begin(), vcallee.end(), callee))
+                    vcallee.push_back(callee);
+            }
+        }
+
+        if (debug)
+            fprintf(stderr, "\n");
+
+        cs_free(ins, 1);
+        cs_close(&h);
+        return vcallee;
+    }
+
+    // _insn_getimm1 checks whether instruction comes with the sole immediate operand and returns it.
+    static std::tuple<uint64_t, bool> _insn_getimm1(cs_arch arch, cs_insn* ins) {
+        using namespace golang;
+        using std::make_tuple;
+
+        switch (arch) {
+        case CS_ARCH_X86: {
+            cs_x86* x86 = &(ins->detail->x86);
+            if (x86->op_count == 1) {
+                cs_x86_op* op = &(x86->operands[0]);
+                if (op->type == X86_OP_IMM)
+                    return make_tuple(op->imm, true);
+            }
+            break;
+        }
+
+        case CS_ARCH_ARM64: {
+            cs_arm64* arm64 = &(ins->detail->arm64);
+            if (arm64->op_count == 1) {
+                cs_arm64_op* op = &(arm64->operands[0]);
+                if (op->type == ARM64_OP_IMM)
+                    return make_tuple(op->imm, true);
+            }
+            break;
+        }
+
+        default:
+            panic("TODO");
+        }
+
+        return make_tuple(0, false);
+    }
+
+
+    int cfunc_is_callee_cleanup(void *cfunc) {
+        // only i386 might have callee-cleanup
+        // https://en.wikipedia.org/wiki/X86_calling_conventions#List_of_x86_calling_conventions
+        if (!(MY_ARCH == CS_ARCH_X86 && MY_MODE == CS_MODE_32))
+            return 0;
+
+        const bool debug = false;
+
+        int stkclean_by_callee = 0;
+        using namespace golang;
+
+        csh       h;
+        cs_insn*  ins;
+        cs_err    err;
+
+        err = cs_open(MY_ARCH, MY_MODE, &h);
+        if (err) {
+            fprintf(stderr, "cs_open: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        err = cs_option(h, CS_OPT_DETAIL, CS_OPT_ON);
+        if (err) {
+            fprintf(stderr, "cs_option: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        ins = cs_malloc(h);
+        if (ins == nil)
+            panic("cs_malloc failed");
+
+        const byte* code = (const byte*)cfunc;
+        size_t      size = 10*1024; // something sane and limited
+        uint64_t    addr = (uint64_t)cfunc;
+        while (cs_disasm_iter(h, &code, &size, &addr, ins)) {
+            if (debug)
+                fprintf(stderr, "0x%" PRIx64 ":\t%s\t\t%s\n", ins->address, ins->mnemonic, ins->op_str);
+
+            if (!cs_insn_group(h, ins, CS_GRP_RET))
+                continue;
+
+            assert(ins->id == X86_INS_RET);
+            cs_x86* x86 =  &(ins->detail->x86);
+            if (x86->op_count > 0) {
+                cs_x86_op* op = &(x86->operands[0]);
+                if (op->type == X86_OP_IMM)
+                    stkclean_by_callee = op->imm;
+            }
+
+            break;
+        }
+
+        if (debug)
+            fprintf(stderr, "  *** CLEANUP BY: %s  (%d)\n", (stkclean_by_callee ? "callee" : "caller"), stkclean_by_callee);
+
+        cs_free(ins, 1);
+        cs_close(&h);
+        return stkclean_by_callee;
+    }
+
+    std::string cfunc_disasm(void *cfunc) {
+        using namespace golang;
+        string disasm;
+
+        csh       h;
+        cs_insn*  ins;
+        cs_err    err;
+
+        err = cs_open(MY_ARCH, MY_MODE, &h);
+        if (err) {
+            fprintf(stderr, "cs_open: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        err = cs_option(h, CS_OPT_DETAIL, CS_OPT_ON);
+        if (err) {
+            fprintf(stderr, "cs_option: %s\n", cs_strerror(err));
+            panic(cs_strerror(err));
+        }
+
+        ins = cs_malloc(h);
+        if (ins == nil)
+            panic("cs_malloc failed");
+
+        const byte* code = (const byte*)cfunc;
+        size_t      size = 10*1024; // something sane and limited
+        uint64_t    addr = (uint64_t)cfunc;
+        while (cs_disasm_iter(h, &code, &size, &addr, ins)) {
+            disasm += fmt::sprintf("0x%" PRIx64 ":\t%s\t\t%s\n", ins->address, ins->mnemonic, ins->op_str);
+
+            // FIXME also handle forward jump like cfunc_direct_callees does
+            //       should be done automatically after DisasmIter dedup
+            if (cs_insn_group(h, ins, CS_GRP_RET))
+                break;
+        }
+
+        cs_free(ins, 1);
+        cs_close(&h);
+
+        return disasm;
+    }
+    """
+    vector[void*] cfunc_direct_callees(void* cfunc)
+    int cfunc_is_callee_cleanup(void* cfunc)
+    string cfunc_disasm(void* cfunc)
+
+
+# _test_inside_counted depends on inside_counted and funchook, which we don't want to expose.
+# -> include the test from here. Do the same for other low-level tests.
+include '_golang_str_pickle_test.pyx'
+
+
+# ---- misc ----
+
+cdef PyMethodDef* tp_methods_lookup(PyMethodDef* methv, str name) except NULL:
+    m = &methv[0]
+    while m.ml_name != NULL:
+        if str(m.ml_name) == name:
+            return m
+        m += 1
+    raise KeyError("method %s not found" % name)
+
+cdef PyMemberDef* tp_members_lookup(PyMemberDef* membv, str name) except NULL:
+    m = &membv[0]
+    while m.name != NULL:
+        if str(m.name) == name:
+            return m
+        m += 1
+    raise KeyError("member %s not found" % name)
diff --git a/golang/_golang_str_pickle_test.pyx b/golang/_golang_str_pickle_test.pyx
new file mode 100644
index 0000000..62c9a2f
--- /dev/null
+++ b/golang/_golang_str_pickle_test.pyx
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2023  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+
+# test for inside_counted
+def _test_inside_counted(): # -> outok
+    outok = ''
+
+    outok += '\n\n\nBEFORE PATCH\n'
+    print('\n\n\nBEFORE PATCH')
+    tfunc(3)
+
+    t0 = ''
+    for i in range(3,0-1,-1):
+        t0 += '> tfunc(%d)\tinside_counter: 0\n' % i
+    for i in range(0,3+1,+1):
+        t0 += '< tfunc(%d)\tinside_counter: 0\n' % i
+    outok += t0
+
+    outok += '\n\n\nPATCHED\n'
+    print('\n\n\nPATCHED')
+    _patch = xfunchook_create()
+    global inside_counted_func
+    inside_counted_func = <void*>&tfunc
+    xfunchook_prepare(_patch, &inside_counted_func, <void*>inside_counted)
+    xfunchook_install(_patch, 0)
+
+    tfunc(12)
+
+    stk_size = 8  # = STK_SIZE from _golang_str_pickle.S
+    for i in range(12,0-1,-1):
+        outok += '> tfunc(%d)\tinside_counter: %d\n' % (i, min(12-i+1, stk_size))
+    for i in range(0,12+1,+1):
+        outok += '< tfunc(%d)\tinside_counter: %d\n' % (i, min(12-i+1, stk_size))
+
+    outok += '\n\n\nUNPATCHED\n'
+    print('\n\n\nUNPATCHED')
+    xfunchook_uninstall(_patch, 0)
+    tfunc(3)
+    outok += t0
+
+    return outok
+
+cdef void tfunc(int x):
+    print('> tfunc(%d)\tinside_counter: %d' % (x, inside_counter))
+    if x > 0:
+        tfunc(x-1)
+    print('< tfunc(%d)\tinside_counter: %d' % (x, inside_counter))
+
+
+def _test_cfunc_is_callee_cleanup():
+    for t in _cfunc_is_callee_cleanup_testv:
+        stkclean = cfunc_is_callee_cleanup(t.cfunc)
+        assert stkclean == t.stkclean_by_callee_ok, (t.cfunc_name, stkclean, t.stkclean_by_callee_ok)
+
+cdef extern from * nogil:
+    r"""
+    struct _Test_cfunc_is_callee_clenup {
+        const char* cfunc_name;
+        void*       cfunc;
+        int         stkclean_by_callee_ok;
+    };
+
+    #define CASE(func, stkclean_ok) \
+        _Test_cfunc_is_callee_clenup{#func, (void*)func, stkclean_ok}
+
+    #if defined(LIBGOLANG_ARCH_386)
+    int CALLCONV(cdecl)
+    tfunc_cdecl1(int x)                     { return x; }
+    int CALLCONV(cdecl)
+    tfunc_cdecl2(int x, int y)              { return x; }
+    int CALLCONV(cdecl)
+    tfunc_cdecl3(int x, int y, int z)       { return x; }
+
+    int CALLCONV(stdcall)
+    tfunc_stdcall1(int x)                   { return x; }
+    int CALLCONV(stdcall)
+    tfunc_stdcall2(int x, int y)            { return x; }
+    int CALLCONV(stdcall)
+    tfunc_stdcall3(int x, int y, int z)     { return x; }
+
+    int CALLCONV(fastcall)
+    tfunc_fastcall1(int x)                  { return x; }
+    int CALLCONV(fastcall)
+    tfunc_fastcall2(int x, int y)           { return x; }
+    int CALLCONV(fastcall)
+    tfunc_fastcall3(int x, int y, int z)    { return x; }
+
+    #ifndef LIBGOLANG_CC_msc    // see note about C3865 in FOR_EACH_CALLCONV
+    int CALLCONV(thiscall)
+    tfunc_thiscall1(int x)                  { return x; }
+    int CALLCONV(thiscall)
+    tfunc_thiscall2(int x, int y)           { return x; }
+    int CALLCONV(thiscall)
+    tfunc_thiscall3(int x, int y, int z)    { return x; }
+    #endif
+
+    #ifndef LIBGOLANG_CC_msc    // no regparm on MSCV
+    int CALLCONV(regparm(1))
+    tfunc_regparm1_1(int x)                 { return x; }
+    int CALLCONV(regparm(1))
+    tfunc_regparm1_2(int x, int y)          { return x; }
+    int CALLCONV(regparm(1))
+    tfunc_regparm1_3(int x, int y, int z)   { return x; }
+
+    int CALLCONV(regparm(2))
+    tfunc_regparm2_1(int x)                 { return x; }
+    int CALLCONV(regparm(2))
+    tfunc_regparm2_2(int x, int y)          { return x; }
+    int CALLCONV(regparm(2))
+    tfunc_regparm2_3(int x, int y, int z)   { return x; }
+
+    int CALLCONV(regparm(3))
+    tfunc_regparm3_1(int x)                 { return x; }
+    int CALLCONV(regparm(3))
+    tfunc_regparm3_2(int x, int y)          { return x; }
+    int CALLCONV(regparm(3))
+    tfunc_regparm3_3(int x, int y, int z)   { return x; }
+    #endif
+
+    static std::vector<_Test_cfunc_is_callee_clenup> _cfunc_is_callee_cleanup_testv = {
+        CASE(tfunc_cdecl1     , 0 * 4),
+        CASE(tfunc_cdecl2     , 0 * 4),
+        CASE(tfunc_cdecl3     , 0 * 4),
+        CASE(tfunc_stdcall1   , 1 * 4),
+        CASE(tfunc_stdcall2   , 2 * 4),
+        CASE(tfunc_stdcall3   , 3 * 4),
+        CASE(tfunc_fastcall1  , 0 * 4),
+        CASE(tfunc_fastcall2  , 0 * 4),
+        CASE(tfunc_fastcall3  , 1 * 4),
+    #ifndef LIBGOLANG_CC_msc
+        CASE(tfunc_thiscall1  , 0 * 4),
+        CASE(tfunc_thiscall2  , 1 * 4),
+        CASE(tfunc_thiscall3  , 2 * 4),
+    #endif
+    #ifndef LIBGOLANG_CC_msc
+        CASE(tfunc_regparm1_1 , 0 * 4),
+        CASE(tfunc_regparm1_2 , 0 * 4),
+        CASE(tfunc_regparm1_3 , 0 * 4),
+        CASE(tfunc_regparm2_1 , 0 * 4),
+        CASE(tfunc_regparm2_2 , 0 * 4),
+        CASE(tfunc_regparm2_3 , 0 * 4),
+        CASE(tfunc_regparm3_1 , 0 * 4),
+        CASE(tfunc_regparm3_2 , 0 * 4),
+        CASE(tfunc_regparm3_3 , 0 * 4),
+    #endif
+    };
+
+    #else
+    // only i386 has many calling conventions
+    int tfunc_default(int x, int y, int z)      { return x; }
+
+    static std::vector<_Test_cfunc_is_callee_clenup> _cfunc_is_callee_cleanup_testv = {
+        CASE(tfunc_default, 0),
+    };
+    #endif
+
+    #undef CASE
+    """
+    struct _Test_cfunc_is_callee_clenup:
+        const char* cfunc_name
+        void*       cfunc
+        int         stkclean_by_callee_ok
+
+    vector[_Test_cfunc_is_callee_clenup] _cfunc_is_callee_cleanup_testv
diff --git a/golang/_strconv.pyx b/golang/_strconv.pyx
index 8ffd6f5..3b1db0c 100644
--- a/golang/_strconv.pyx
+++ b/golang/_strconv.pyx
@@ -28,12 +28,11 @@ from golang cimport pyb, byte, rune
 from golang cimport _utf8_decode_rune, _xunichr
 from golang.unicode cimport utf8
 
-from cpython cimport PyObject
+from cpython cimport PyObject, _PyBytes_Resize
 
 cdef extern from "Python.h":
     PyObject* PyBytes_FromStringAndSize(char*, Py_ssize_t) except NULL
     char* PyBytes_AS_STRING(PyObject*)
-    int _PyBytes_Resize(PyObject**, Py_ssize_t) except -1
     void Py_DECREF(PyObject*)
 
 
@@ -65,7 +64,7 @@ cdef bytes _quote(const byte[::1] s, char quote, bint* out_nonascii_escape): # -
     cdef byte c
     q[0] = quote;  q += 1
     while i < len(s):
-        c = s[i]
+        c = s[i]        # XXX -> use raw pointer in the loop
         # fast path - ASCII only
         if c < 0x80:
             if c in (ord('\\'), quote):
@@ -104,7 +103,8 @@ cdef bytes _quote(const byte[::1] s, char quote, bint* out_nonascii_escape): # -
 
         # slow path - full UTF-8 decoding + unicodedata
         else:
-            r, size = _utf8_decode_rune(s[i:])
+            # XXX optimize non-ascii case
+            r, size = _utf8_decode_rune(s[i:])  # XXX -> raw pointer
             isize = i + size
 
             # decode error - just emit raw byte as escaped
@@ -117,6 +117,9 @@ cdef bytes _quote(const byte[::1] s, char quote, bint* out_nonascii_escape): # -
                 q += 4
 
             # printable utf-8 characters go as is
+            # XXX ? use Py_UNICODE_ISPRINTABLE (py3, not available on py2)  ?
+            # XXX ? and generate C table based on unicodedata for py2 ?
+            # XXX -> generate table based on unicodedata for both py2/py3 because Py_UNICODE_ISPRINTABLE is not exactly what matches strconv.IsPrint  (i.e. cat starts from LNPS)
             elif _unicodedata_category(_xunichr(r))[0] in 'LNPS': # letters, numbers, punctuation, symbols
                 for j in range(i, isize):
                     q[0] = s[j]
diff --git a/golang/fmt.h b/golang/fmt.h
index f548f0b..7c33802 100644
--- a/golang/fmt.h
+++ b/golang/fmt.h
@@ -111,7 +111,7 @@ inline error errorf(const string& format, Argv... argv) {
 // `const char *` overloads just to catch format mistakes as
 // __attribute__(format) does not work with std::string.
 LIBGOLANG_API string sprintf(const char *format, ...)
-#ifndef _MSC_VER
+#ifndef LIBGOLANG_CC_msc
                                 __attribute__ ((format (printf, 1, 2)))
 #endif
 	;
diff --git a/golang/golang_str_pickle_test.py b/golang/golang_str_pickle_test.py
new file mode 100644
index 0000000..1bf1a7b
--- /dev/null
+++ b/golang/golang_str_pickle_test.py
@@ -0,0 +1,512 @@
+# -*- coding: utf-8 -*-
+# Copyright (C) 2022-2023  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+
+from __future__ import print_function, absolute_import
+
+from golang import b, u, bstr, ustr
+from golang.golang_str_test import xbytes, x32, unicode
+from golang._golang import _test_inside_counted, _test_cfunc_is_callee_cleanup
+from gpython.gpython_test import is_gpython
+from pytest import raises, fixture, mark
+import sys, io, struct
+import six
+
+# run all tests on all py/c pickle modules we aim to support
+import pickle as stdPickle
+if six.PY2:
+    import cPickle
+else:
+    import _pickle as cPickle
+from zodbpickle import slowpickle as zslowPickle
+from zodbpickle import fastpickle as zfastPickle
+from zodbpickle import pickle  as zpickle
+from zodbpickle import _pickle as _zpickle
+import pickletools as stdpickletools
+if six.PY2:
+    from zodbpickle import pickletools_2 as zpickletools
+else:
+    from zodbpickle import pickletools_3 as zpickletools
+
+
+# pickle is pytest fixture that yields all variants of pickle module.
+@fixture(scope="function", params=[stdPickle, cPickle,
+                                   zslowPickle, zfastPickle, zpickle, _zpickle])
+def pickle(request):
+    yield request.param
+
+# pickletools is pytest fixture that yields all variants of pickletools module.
+@fixture(scope="function", params=[stdpickletools, zpickletools])
+def pickletools(request):
+    yield request.param
+
+# pickle2tools returns pickletools module that corresponds to module pickle.
+def pickle2tools(pickle):
+    if pickle in (stdPickle, cPickle):
+        return stdpickletools
+    else:
+        return zpickletools
+
+# @gpystr_only is marker to run a test only under gpython -X gpython.strings=bstr+ustr
+is_gpystr = type(u'') is ustr
+gpystr_only = mark.skipif(not is_gpystr, reason="gpystr-only test")
+
+
+# ---- pickling/unpickling under gpystr ----
+
+# verify that loading *STRING opcodes loads them as bstr on gpython by default.
+# TODO or with encoding='bstr' under plain py
+@gpystr_only
+def test_string_pickle_load_STRING(pickle):
+    p_str   = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80\\xff'\n."      # STRING 'мир\xff'
+    p_utf8  = b"S'"+xbytes('мир')+b"\\xff'\n."                  # STRING 'мир\xff'
+    p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.'             # SHORT_BINSTRING 'мир\xff'
+    p_bins  = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
+
+    p_bytes = xbytes('мир')+b'\xff'
+
+    # check invokes f on all test pickles
+    def check(f):
+        f(p_str)
+        f(p_utf8)
+        f(p_sbins)
+        f(p_bins)
+
+    # default -> bstr  on both py2 and py3
+    # TODO only this check is gpystr_only -> remove whole-func @gpystr_only
+    def _(p):
+        obj = xloads(pickle, p)
+        assert type(obj) is bstr
+        assert obj == p_bytes
+    check(_)
+
+    # also test bstr inside tuple (for symmetry with save)
+    def _(p):
+        p_ = b'(' + p[:-1] + b't.'
+        tobj = xloads(pickle, p_)
+        assert type(tobj) is tuple
+        assert len(tobj) == 1
+        obj = tobj[0]
+        assert type(obj) is bstr
+        assert obj == p_bytes
+    check(_)
+
+    # pickle supports encoding=... only on py3
+    if six.PY3:
+        # encoding='bstr'  -> bstr
+        def _(p):
+            obj = xloads(pickle, p, encoding='bstr')
+            assert type(obj) is bstr
+            assert obj == p_bytes
+        check(_)
+
+        # encoding='bytes' -> bytes
+        def _(p):
+            obj = xloads(pickle, p, encoding='bytes')
+            assert type(obj) is bytes
+            assert obj == p_bytes
+        check(_)
+
+        # encoding='utf-8' -> UnicodeDecodeError
+        def _(p):
+            with raises(UnicodeDecodeError):
+                xloads(pickle, p, encoding='utf-8')
+        check(_)
+
+        # encoding='utf-8', errors=... -> unicode
+        def _(p):
+            obj = xloads(pickle, p, encoding='utf-8', errors='backslashreplace')
+            assert type(obj) is unicode
+            assert obj == u'мир\\xff'
+        check(_)
+
+
+# verify that saving bstr results in *STRING opcodes on gpython.
+@gpystr_only
+def test_strings_pickle_save_STRING(pickle):
+    s = s0 = b(xbytes('мир')+b'\xff')
+    assert type(s) is bstr
+
+    p_utf8  = b"S'"+xbytes('мир')+b"\\xff'\n."                  # STRING 'мир\xff'
+    p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.'             # SHORT_BINSTRING 'мир\xff'
+    p_bins  = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
+
+    def dumps(proto):
+        return xdumps(pickle, s, proto)
+
+    assert dumps(0) == p_utf8
+    for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
+        assert dumps(proto) == p_sbins
+
+    # BINSTRING
+    s += b'\x55'*0x100
+    p_bins_ = p_bins[:2] + b'\x01' + p_bins[3:-1] + b'\x55'*0x100 + b'.'
+    for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
+        assert dumps(proto) == p_bins_
+
+    # also test bstr inside tuple to verify that what we patched is actually
+    # _pickle.save that is invoked from inside other save_X functions.
+    s = (s0,)
+    p_tutf8  = b'(' + p_utf8[:-1]  + b't.'
+    p_tsbins = b'(' + p_sbins[:-1] + b't.'
+    assert dumps(0) == p_tutf8
+    assert dumps(1) == p_tsbins
+    # don't test proto ≥ 2 because they start to use TUPLE1 instead of TUPLE
+
+
+# verify that loading *UNICODE opcodes loads them as unicode/ustr.
+# this is standard behaviour but we verify it since we patch pickle's strings processing.
+# also verify save lightly for symmetry.
+# NOTE not @gpystr_only
+def test_string_pickle_loadsave_UNICODE(pickle):
+    # NOTE builtin pickle behaviour is to save unicode via 'surrogatepass' error handler
+    #      this means that b'мир\xff' -> ustr/unicode -> save will emit *UNICODE with
+    #      b'мир\xed\xb3\xbf' instead of b'мир\xff' as data.
+    p_uni   = b'V\\u043c\\u0438\\u0440\\udcff\n.'                       # UNICODE 'мир\uDCFF'
+    p_binu  = b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbf.' # BINUNICODE  NOTE ...edb3bf not ...ff
+    p_sbinu = b'\x8c\x09\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbf.'          # SHORT_BINUNICODE
+    p_binu8 = b'\x8d\x09\x00\x00\x00\x00\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbf.' # BINUNICODE8
+
+    u_obj = u'мир\uDCFF'; assert type(u_obj) is unicode
+
+    # load: check invokes f on all test pickles that pickle should support
+    def check(f):
+        f(p_uni)
+        f(p_binu)
+        if HIGHEST_PROTOCOL(pickle) >= 4:
+            f(p_sbinu)
+            f(p_binu8)
+
+    def _(p):
+        obj = xloads(pickle, p)
+        assert type(obj) is unicode
+        assert obj == u_obj
+    check(_)
+
+    # save
+    def dumps(proto):
+        return xdumps(pickle, u_obj, proto)
+    assert dumps(0) == p_uni
+    assert dumps(1) == p_binu
+    assert dumps(2) == p_binu
+    if HIGHEST_PROTOCOL(pickle) >= 3:
+        assert dumps(3) == p_binu
+    if HIGHEST_PROTOCOL(pickle) >= 4:
+        assert dumps(4) == p_sbinu
+
+
+# ---- pickling/unpickling generally without gpystr ----
+
+# verify that bstr/ustr can be pickled/unpickled correctly on !gpystr.
+# gpystr should also load ok what was pickled on !gpystr.
+# for uniformity gpystr is also verified to save/load objects correctly.
+# However the main gpystr tests are load/save tests for *STRING and *UNICODE above.
+def test_strings_pickle_bstr_ustr(pickle):
+    bs = b(xbytes('мир')+b'\xff')
+    us = u(xbytes('май')+b'\xff')
+
+    def diss(p): return xdiss(pickle2tools(pickle), p)
+    def dis(p): print(diss(p))
+
+    # assert_pickle verifies that pickling obj results in
+    #
+    #   - dumps_ok_gpystr  (when run under gpython with gpython.string=bstr+ustr),  or
+    #   - dumps_ok_stdstr  (when run under plain python or gpython with gpython.strings=pystd)
+    #
+    # and that unpickling results back in obj.
+    #
+    # gpystr should also unpickle !gpystr pickle correctly.
+    assert HIGHEST_PROTOCOL(pickle) <= 5
+    def assert_pickle(obj, proto, dumps_ok_gpystr, dumps_ok_stdstr):
+        if proto > HIGHEST_PROTOCOL(pickle):
+            with raises(ValueError):
+                xdumps(pickle, obj, proto)
+            return
+        p = xdumps(pickle, obj, proto)
+        if not is_gpystr:
+            assert p == dumps_ok_stdstr, diss(p)
+            dumps_okv = [dumps_ok_stdstr]
+        else:
+            assert p == dumps_ok_gpystr, diss(p)
+            dumps_okv = [dumps_ok_gpystr, dumps_ok_stdstr]
+        for p in dumps_okv:
+            #dis(p)
+            obj2 = xloads(pickle, p)
+            assert type(obj2) is type(obj)
+            assert obj2 == obj
+
+    _ = assert_pickle
+
+    _(bs, 0, xbytes("S'мир\\xff'\n."),                                      # STRING
+             b"cgolang\nbstr\n(V\\u043c\\u0438\\u0440\\udcff\ntR.")         # bstr(UNICODE)
+
+    _(us, 0, b'V\\u043c\\u0430\\u0439\\udcff\n.',                           # UNICODE
+             b'cgolang\nustr\n(V\\u043c\\u0430\\u0439\\udcff\ntR.')         # ustr(UNICODE)
+
+    _(bs, 1, b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.',                         # SHORT_BINSTRING
+             b'cgolang\nbstr\n(X\x09\x00\x00\x00'                           # bstr(BINUNICODE)
+                        b'\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbftR.')
+
+    # NOTE BINUNICODE ...edb3bf not ...ff  (see test_string_pickle_loadsave_UNICODE for details)
+    _(us, 1, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.',     # BINUNICODE
+             b'cgolang\nustr\n(X\x09\x00\x00\x00'                           # bstr(BINUNICODE)
+                        b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbftR.')
+
+    _(bs, 2, b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.',                         # SHORT_BINSTRING
+             b'cgolang\nbstr\nX\x09\x00\x00\x00'                            # bstr(BINUNICODE)
+                        b'\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbf\x85\x81.')
+
+    _(us, 2, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.',     # BINUNICODE
+             b'cgolang\nustr\nX\x09\x00\x00\x00'                            # ustr(BINUNICODE)
+                        b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf\x85\x81.')
+
+    _(bs, 3, b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.',                         # SHORT_BINSTRING
+             b'cgolang\nbstr\nC\x07\xd0\xbc\xd0\xb8\xd1\x80\xff\x85\x81.')  # bstr(SHORT_BINBYTES)
+
+    _(us, 3, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.',     # BINUNICODE
+             b'cgolang\nustr\nX\x09\x00\x00\x00'                            # ustr(BINUNICODE)
+                        b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf\x85\x81.')
+
+    for p in (4,5):
+        _(bs, p,
+             b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.',                         # SHORT_BINSTRING
+             b'\x8c\x06golang\x8c\x04bstr\x93C\x07'                         # bstr(SHORT_BINBYTES)
+                        b'\xd0\xbc\xd0\xb8\xd1\x80\xff\x85\x81.')
+        _(us, p,
+             b'\x8c\x09\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.',              # SHORT_BINUNICODE
+             b'\x8c\x06golang\x8c\x04ustr\x93\x8c\x09'                      # ustr(SHORT_BINUNICODE)
+                        b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf\x85\x81.')
+
+
+# ---- disassembly ----
+
+# xdiss returns disassembly of a pickle as string.
+def xdiss(pickletools, p): # -> str
+    out = six.StringIO()
+    pickletools.dis(p, out)
+    return out.getvalue()
+
+# verify that disassembling *STRING opcodes works with treating strings as UTF8b.
+@gpystr_only
+def test_string_pickle_dis_STRING(pickletools):
+    p_str   = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80'\n."       # STRING 'мир'
+    p_sbins = b'U\x06\xd0\xbc\xd0\xb8\xd1\x80.'             # SHORT_BINSTRING 'мир'
+    p_bins  = b'T\x06\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80.' # BINSTRING 'мир'
+
+    bmir = x32("b('мир')", "'мир'")
+
+    assert xdiss(pickletools, p_str) == """\
+    0: S    STRING     %s
+   28: .    STOP
+highest protocol among opcodes = 0
+""" % bmir
+
+    assert xdiss(pickletools, p_sbins) == """\
+    0: U    SHORT_BINSTRING %s
+    8: .    STOP
+highest protocol among opcodes = 1
+""" % bmir
+
+    assert xdiss(pickletools, p_bins) == """\
+    0: T    BINSTRING  %s
+   11: .    STOP
+highest protocol among opcodes = 1
+""" % bmir
+
+
+# ---- loads and normalized dumps ----
+
+# xloads loads pickle p via pickle.loads
+# it also verifies that .load and Unpickler.load give the same result.
+def xloads(pickle, p, **kw):
+    obj1 = _xpickle_attr(pickle, 'loads')(p, **kw)
+    obj2 = _xpickle_attr(pickle, 'load') (io.BytesIO(p), **kw)
+    obj3 = _xpickle_attr(pickle, 'Unpickler')(io.BytesIO(p), **kw).load()
+    assert type(obj2) is type(obj1)
+    assert type(obj3) is type(obj1)
+    assert obj1 == obj2 == obj3
+    return obj1
+
+# xdumps dumps obj via pickle.dumps
+# it also verifies that .dump and Pickler.dump give the same.
+# the pickle is returned in normalized form - see pickle_normalize for details.
+def xdumps(pickle, obj, proto, **kw):
+    p1 = _xpickle_attr(pickle, 'dumps')(obj, proto, **kw)
+    f2 = io.BytesIO();  _xpickle_attr(pickle, 'dump')(obj, f2, proto, **kw)
+    p2 = f2.getvalue()
+    f3 = io.BytesIO();  _xpickle_attr(pickle, 'Pickler')(f3, proto, **kw).dump(obj)
+    p3 = f3.getvalue()
+    assert type(p1) is bytes
+    assert type(p2) is bytes
+    assert type(p3) is bytes
+    assert p1 == p2 == p3
+
+    # remove not interesting parts: PROTO / FRAME header and unused PUTs
+    if proto >= 2:
+        protover = PROTO(proto)
+        assert p1.startswith(protover)
+    return pickle_normalize(pickle2tools(pickle), p1)
+
+def _xpickle_attr(pickle, name):
+    # on py3 pickle.py tries to import from C _pickle to optimize by default
+    # -> verify py version if we are asked to test pickle.py
+    if six.PY3 and (pickle is stdPickle):
+        assert getattr(pickle, name) is getattr(cPickle, name)
+        name = '_'+name
+    return getattr(pickle, name)
+
+# pickle_normalize returns normalized version of pickle p.
+#
+# - PROTO and FRAME opcodes are removed from header,
+# - unused PUT, BINPUT and MEMOIZE opcodes - those without corresponding GET are removed,
+# - *PUT indices start from 0 (this unifies cPickle with pickle).
+def pickle_normalize(pickletools, p):
+    def iter_pickle(p): # -> i(op, arg, pdata)
+        op_prev  = None
+        arg_prev = None
+        pos_prev = None
+        for op, arg, pos in pickletools.genops(p):
+            if op_prev is not None:
+                pdata_prev = p[pos_prev:pos]
+                yield (op_prev, arg_prev, pdata_prev)
+            op_prev  = op
+            arg_prev = arg
+            pos_prev = pos
+        if op_prev is not None:
+            yield (op_prev, arg_prev, p[pos_prev:])
+
+    memo_oldnew = {} # idx used in original pop/get -> new index | None if not get
+    idx = 0
+    for op, arg, pdata in iter_pickle(p):
+        if 'PUT' in op.name:
+            memo_oldnew.setdefault(arg, None)
+        elif 'MEMOIZE' in op.name:
+            memo_oldnew.setdefault(len(memo_oldnew), None)
+        elif 'GET' in op.name:
+            if memo_oldnew.get(arg) is None:
+                memo_oldnew[arg] = idx
+                idx += 1
+
+    pout = b''
+    memo_old = set() # idx used in original pop
+    for op, arg, pdata in iter_pickle(p):
+        if op.name in ('PROTO', 'FRAME'):
+            continue
+        if 'PUT' in op.name:
+            memo_old.add(arg)
+            newidx = memo_oldnew.get(arg)
+            if newidx is None:
+                continue
+            pdata = globals()[op.name](newidx)
+        if 'MEMOIZE' in op.name:
+            idx = len(memo_old)
+            memo_old.add(idx)
+            newidx = memo_oldnew.get(idx)
+            if newidx is None:
+                continue
+        if 'GET' in op.name:
+            newidx = memo_oldnew[arg]
+            assert newidx is not None
+            pdata = globals()[op.name](newidx)
+        pout += pdata
+    return pout
+
+P = struct.pack
+def PROTO(version):     return b'\x80'  + P('<B', version)
+def FRAME(size):        return b'\x95'  + P('<Q', size)
+def GET(idx):           return b'g%d\n' % (idx,)
+def PUT(idx):           return b'p%d\n' % (idx,)
+def BINPUT(idx):        return b'q'     + P('<B', idx)
+def BINGET(idx):        return b'h'     + P('<B', idx)
+def LONG_BINPUT(idx):   return b'r'     + P('<I', idx)
+def LONG_BINGET(idx):   return b'j'     + P('<I', idx)
+MEMOIZE =                      b'\x94'
+
+def test_pickle_normalize(pickletools):
+    def diss(p):
+        return xdiss(pickletools, p)
+
+    proto = 0
+    for op in pickletools.opcodes:
+        proto = max(proto, op.proto)
+    assert proto >= 2
+
+    def _(p, p_normok):
+        p_norm = pickle_normalize(pickletools, p)
+        assert p_norm == p_normok, diss(p_norm)
+
+    _(b'.', b'.')
+    _(b'I1\n.', b'I1\n.')
+    _(PROTO(2)+b'I1\n.', b'I1\n.')
+
+    putgetv = [(PUT,GET), (BINPUT, BINGET)]
+    if proto >= 4:
+        putgetv.append((LONG_BINPUT, LONG_BINGET))
+    for (put,get) in putgetv:
+        _(b'(I1\n'+put(1) + b'I2\n'+put(2) +b't'+put(3)+b'0'+get(3)+put(4)+b'.',
+          b'(I1\nI2\nt'+put(0)+b'0'+get(0)+b'.')
+
+    if proto >= 4:
+        _(FRAME(4)+b'I1\n.', b'I1\n.')
+        _(b'I1\n'+MEMOIZE+b'I2\n'+MEMOIZE+GET(0)+b'.',
+          b'I1\n'+MEMOIZE+b'I2\n'+GET(0)+b'.')
+
+
+# ---- internals of patching ----
+
+# being able to cPickle bstr as STRING depends on proper working of inside_counted function.
+# Verify it with dedicated unit test.
+def test_inside_counted(capsys):
+    outok = _test_inside_counted()
+    _ = capsys.readouterr()
+    if _.err:
+        print(_.err, file=sys.stderr)
+    assert _.out == outok
+
+def test_cfunc_is_callee_cleanup():
+    _test_cfunc_is_callee_cleanup()
+
+# verify that what we patched - e.g. PyUnicode_Decode - stay unaffected when
+# called outside of bstr/ustr context.
+# NOTE this test complements test_strings_patched_transparently in golang_str_test.py
+def test_pickle_strings_patched_transparently():
+    # PyUnicode_Decode stays working and unaffected
+    b_  = xbytes("abc")
+    _ = b_.decode();         assert type(_) is unicode;  assert _ == u"abc"
+    _ = b_.decode("utf8");   assert type(_) is unicode;  assert _ == u"abc"
+    _ = b_.decode("ascii");  assert type(_) is unicode;  assert _ == u"abc"
+
+    b_  = xbytes("мир")
+    _ = b_.decode("utf8");   assert type(_) is unicode;  assert _ == u"мир"
+    with raises(UnicodeDecodeError):
+        b_.decode("ascii")
+
+
+# ---- misc ----
+
+# HIGHEST_PROTOCOL returns highest protocol supported by pickle.
+def HIGHEST_PROTOCOL(pickle):
+    if   six.PY3  and  pickle is cPickle:
+        pmax = stdPickle.HIGHEST_PROTOCOL  # py3: _pickle has no .HIGHEST_PROTOCOL
+    elif six.PY3  and  pickle is _zpickle:
+        pmax = zpickle.HIGHEST_PROTOCOL    # ----//---- for _zpickle
+    else:
+        pmax = pickle.HIGHEST_PROTOCOL
+    assert pmax >= 2
+    return pmax
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 71c4cff..0692de7 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -146,9 +146,17 @@ def test_strings_basic():
     _ = ustr(123);      assert type(_) is ustr;  assert _ == '123'
     _ = bstr([1,'β']);  assert type(_) is bstr;  assert _ == "[1, 'β']"
     _ = ustr([1,'β']);  assert type(_) is ustr;  assert _ == "[1, 'β']"
-    obj = object()
-    _ = bstr(obj);      assert type(_) is bstr;  assert _ == str(obj)  # <object ...>
-    _ = ustr(obj);      assert type(_) is ustr;  assert _ == str(obj)  # <object ...>
+    obj = object();  assert str(obj).startswith('<object object at 0x')
+    _ = bstr(obj);      assert type(_) is bstr;  assert _ == str(obj)
+    _ = ustr(obj);      assert type(_) is ustr;  assert _ == str(obj)
+    ecls = RuntimeError;  assert str(ecls) == x32("<class 'RuntimeError'>",
+                                                 "<type 'exceptions.RuntimeError'>")
+    _ = bstr(ecls);     assert type(_) is bstr;  assert _ == str(ecls)
+    _ = ustr(ecls);     assert type(_) is ustr;  assert _ == str(ecls)
+    exc = RuntimeError('zzz');  assert str(exc) == 'zzz'
+    _ = bstr(exc);      assert type(_) is bstr;  assert _ == str(exc)
+    _ = ustr(exc);      assert type(_) is ustr;  assert _ == str(exc)
+
 
     # when stringifying they also handle bytes/bytearray inside containers as UTF-8 strings
     _ = bstr([xunicode(  'β')]);   assert type(_) is bstr;  assert _ == "['β']"
@@ -246,10 +254,12 @@ def test_strings_basic():
     assert hash(bs) == hash("мир");  assert bs == "мир"
 
     # str/repr
+    def rb(x,y): return xb32(x, 'b'+y,y)
+    def ru(x,y): return xu32(x, y,'u'+y)
     _ = str(us);   assert isinstance(_, str);  assert _ == "мир"
     _ = str(bs);   assert isinstance(_, str);  assert _ == "мир"
-    _ = repr(us);  assert isinstance(_, str);  assert _ == "u('мир')"
-    _ = repr(bs);  assert isinstance(_, str);  assert _ == "b('мир')"
+    _ = repr(us);  assert isinstance(_, str);  assert _ == ru("u('мир')",  "'мир'")
+    _ = repr(bs);  assert isinstance(_, str);  assert _ == rb("b('мир')",  "'мир'")
 
     # str/repr of non-valid utf8
     b_hik8 = xbytes  ('привет ')+b(k8mir_bytes);  assert type(b_hik8) is bstr
@@ -259,11 +269,17 @@ def test_strings_basic():
 
     _ = str(u_hik8);   assert isinstance(_, str);  assert _ == xbytes('привет ')+b'\xcd\xc9\xd2'
     _ = str(b_hik8);   assert isinstance(_, str);  assert _ == xbytes('привет ')+b'\xcd\xc9\xd2'
-    _ = repr(u_hik8);  assert isinstance(_, str);  assert _ == r"u(b'привет \xcd\xc9\xd2')"
-    _ = repr(b_hik8);  assert isinstance(_, str);  assert _ == r"b(b'привет \xcd\xc9\xd2')"
+    _ = repr(u_hik8);  assert isinstance(_, str);  assert _ ==      r"u(b'привет \xcd\xc9\xd2')"
+                                                                    # NOTE ^^^ same for u,3/2
+    _ = repr(b_hik8);  assert isinstance(_, str);  assert _ == rb(r"b(b'привет \xcd\xc9\xd2')",
+                                                                     r"'привет \xcd\xc9\xd2'")
 
     # str/repr of quotes
     def _(text, breprok, ureprok):
+        assert breprok[:2] == "b(";  assert breprok[-1] == ")"
+        assert ureprok[:2] == "u(";  assert ureprok[-1] == ")"
+        breprok = rb(breprok, breprok[2:-1])  # b('...')  or '...' if bytes   patched
+        ureprok = ru(ureprok, ureprok[2:-1])  # u('...')  or '...' if unicode patched
         bt = b(text);  assert type(bt) is bstr
         ut = u(text);  assert type(ut) is ustr
         _ = str(bt);   assert isinstance(_, str);  assert _ == text
@@ -286,20 +302,26 @@ def _(text, breprok, ureprok):
 
 # verify that bstr/ustr are created with correct refcount.
 def test_strings_refcount():
+    # buffer with string data - not bytes nor unicode so that when builting
+    # string types are patched no case where bytes is created from the same
+    # bytes, or unicode is created from the same unicode - only increasing
+    # refcount of original object.
+    data = bytearray([ord('a'), ord('b'), ord('c'), ord('4')])
+
     # first verify our logic on std type
-    obj = xbytes(u'abc');   assert type(obj) is bytes
+    obj = bytes(data);      assert type(obj) is bytes
     gc.collect();   assert sys.getrefcount(obj) == 1+1   # +1 due to obj passed to getrefcount call
 
     # bstr
-    obj = b('abc');         assert type(obj) is bstr
+    obj = b(data);          assert type(obj) is bstr
     gc.collect();           assert sys.getrefcount(obj) == 1+1
-    obj = bstr('abc');      assert type(obj) is bstr
+    obj = bstr(data);       assert type(obj) is bstr
     gc.collect();           assert sys.getrefcount(obj) == 1+1
 
     # ustr
-    obj = u('abc');         assert type(obj) is ustr
+    obj = u(data);          assert type(obj) is ustr
     gc.collect();           assert sys.getrefcount(obj) == 1+1
-    obj = ustr('abc');      assert type(obj) is ustr
+    obj = ustr(data);       assert type(obj) is ustr
     gc.collect();           assert sys.getrefcount(obj) == 1+1
 
 
@@ -326,26 +348,6 @@ def _(i): # returns m[i] as int
     assert _(5) == 0x80
 
 
-# verify that bstr/ustr can be pickled/unpickled correctly.
-def test_strings_pickle():
-    bs = b("мир")
-    us = u("май")
-
-    #from pickletools import dis
-    for proto in range(0, pickle.HIGHEST_PROTOCOL+1):
-        p_bs = pickle.dumps(bs, proto)
-        #dis(p_bs)
-        bs_ = pickle.loads(p_bs)
-        assert type(bs_) is bstr
-        assert bs_ == bs
-
-        p_us = pickle.dumps(us, proto)
-        #dis(p_us)
-        us_ = pickle.loads(p_us)
-        assert type(us_) is ustr
-        assert us_ == us
-
-
 # verify that ord on bstr/ustr works as expected.
 def test_strings_ord():
     with raises(TypeError): ord(b(''))
@@ -617,7 +619,8 @@ def test_strings_iter():
 
     # iter( b/u/unicode ) -> iterate unicode characters
     # NOTE that iter(b) too yields unicode characters - not integers or bytes
-    bi  = iter(bs)
+    #bi  = iter(bs)         # XXX temp disabled
+    bi  = iter(us)
     ui  = iter(us)
     ui_ = iter(u_)
     class XIter:
@@ -1100,64 +1103,65 @@ def xfmt(fmt, args):
 
     # _bprintf parses %-format ourselves. Verify that parsing first
     # NOTE here all strings are plain ASCII.
-    def _(fmt, args):
+    def _(fmt, args, ok):
         fmt = '*str '+fmt
-        for l in range(len(fmt), -1, -1):
-            # [:len(fmt)] verifies original case
-            # [:l<len]    should verify "incomplete format" parsing
-            verify_fmt_all_types(lambda fmt, args: fmt % args,
-                                 fmt[:l], args, excok=True)
-
-    _('%(name)s',   {'name': 123})
-    _('%x',         123)        # flags
-    _('%#x',        123)
-    _('%05d',       123)
-    _('%-5d',       123)
-    _('% d',        123)
-    _('% d',       -123)
-    _('%+d',       -123)
-    _('%5d',        123)        # width
-    _('%*d',        (5,123))
-    _('%f',         1.234)      # .prec
-    _('%.f',        1.234)
-    _('%.1f',       1.234)
-    _('%.2f',       1.234)
-    _('%*f',        (2,1.234))
-    _('%hi',        123)        # len
-    _('%li',        123)
-    _('%Li',        123)
-    _('%%',         ())         # %%
-    _('%10.4f',     1.234)      # multiple features
-    _('%(x)10.4f',  {'y':0, 'x':1.234})
-    _('%*.*f',      (10,4,1.234))
-
-    _('',           {})         # not all arguments converted
-    _('',           [])
-    _('',           123)
-    _('',           '123')
-    _('%s',         ())         # not enough arguments to format
-    _('%s %s',      123)
-    _('%s %s',      (123,))
-
-    _('%(x)s',      123)        # format requires a mapping
-    _('%(x)s',      (123,))
-    _('%s %(x)s',   (123,4))
-    _('%(x)s %s',   (123,4))
-
-    _('%(x)s %s',   {'x':1})    # mixing tuple/dict
-    _('%s %(x)s',   {'x':1})
-
-    _('abc %z',     1)          # unsupported format character
-    _('abc %44z',   1)
+        if isinstance(ok, Exception):
+            excok = True
+        else:
+            ok  = '*str '+ok
+            excok = False
+        verify_fmt_all_types(lambda fmt, args: fmt % args, fmt, args, ok, excok=excok)
+        # also automatically verify "incomplete format" parsing via fmt[:l<len]
+        # this works effectively only when run under std python though.
+        for l in range(len(fmt)-1, -1, -1):
+            verify_fmt_all_types(lambda fmt, args: fmt % args, fmt[:l], args, excok=True)
+
+    _('%(name)s',   {'name': 123}   ,   '123')
+    _('%x',         123             ,   '7b')           # flags
+    _('%#x',        123             ,   '0x7b')
+    _('%05d',       123             ,   '00123')
+    _('%-5d',       123             ,   '123  ')
+    _('% d',        123             ,   ' 123')
+    _('% d',       -123             ,   '-123')
+    _('%+d',        123             ,   '+123')
+    _('%+d',       -123             ,   '-123')
+    _('%5d',        123             ,   '  123')        # width
+    _('%*d',        (5,123)         ,   '  123')
+    _('%f',         1.234           ,   '1.234000')     # .prec
+    _('%.f',        1.234           ,   '1')
+    _('%.1f',       1.234           ,   '1.2')
+    _('%.2f',       1.234           ,   '1.23')
+    _('%*f',        (2,1.234)       ,   '1.234000')
+    _('%.*f',       (2,1.234)       ,   '1.23')
+    _('%hi',        123             ,   '123')          # len
+    _('%li',        123             ,   '123')
+    _('%Li',        123             ,   '123')
+    _('%%',         ()              ,   '%')            # %%
+    _('%10.4f',     1.234           ,   '    1.2340')   # multiple features
+    _('%(x)10.4f',  {'y':0, 'x':1.234}, '    1.2340')
+    _('%*.*f',      (10,4,1.234)    ,   '    1.2340')
+
+    _('',           {}      ,   '')                     # errors
+    _('',           []      ,   '')
+    _('',           123     ,   TypeError('not all arguments converted during string formatting'))
+    _('',           '123'   ,   TypeError('not all arguments converted during string formatting'))
+    _('%s',         ()      ,   TypeError('not enough arguments for format string'))
+    _('%s %s',      123     ,   TypeError('not enough arguments for format string'))
+    _('%s %s',      (123,)  ,   TypeError('not enough arguments for format string'))
+
+    _('%(x)s',      123     ,   TypeError('format requires a mapping'))
+    _('%(x)s',      (123,)  ,   TypeError('format requires a mapping'))
+    _('%s %(x)s',   (123,4) ,   TypeError('format requires a mapping'))
+    _('%(x)s %s',   (123,4) ,   TypeError('format requires a mapping'))
+
+    _('%(x)s %s',   {'x':1} ,   TypeError('not enough arguments for format string'))    # mixing tuple/dict
+    _('%s %(x)s',   {'x':1} ,   "{'x': 1} 1")
 
     # for `'%4%' % ()` py2 gives '   %', but we stick to more reasonable py3 semantic
-    def _(fmt, args, ok):
-        return verify_fmt_all_types(lambda fmt, args: fmt % args,
-                                    fmt, args, ok, excok=True)
-    _('*str %4%',   (),      TypeError("not enough arguments for format string"))
-    _('*str %4%',   1,       ValueError("unsupported format character '%' (0x25) at index 7"))
-    _('*str %4%',   (1,),    ValueError("unsupported format character '%' (0x25) at index 7"))
-    _('*str %(x)%', {'x':1}, ValueError("unsupported format character '%' (0x25) at index 9"))
+    _('%4%',        ()      ,   TypeError("not enough arguments for format string"))
+    _('%4%',        1       ,   ValueError("unsupported format character '%' (0x25) at index 7"))
+    _('%4%',        (1,)    ,   ValueError("unsupported format character '%' (0x25) at index 7"))
+    _('%(x)%',      {'x':1} ,   ValueError("unsupported format character '%' (0x25) at index 9"))
 
 
     # parse checking complete. now verify actual %- and format- formatting
@@ -1211,40 +1215,42 @@ def _(fmt, args, *okv):
             fmt_ = fmt
         verify_fmt_all_types(xformat, fmt_, args, *okv)
 
-    _("*str a %s z",  123)      # NOTE *str to force str -> bstr/ustr even for ASCII string
-    _("*str a %s z",  '*str \'"\x7f')
-    _("*str a %s z",  'β')
-    _("*str a %s z",  ('β',))
+    # NOTE *str to force str -> bstr/ustr even for ASCII string
+    _("*str a %s z",  123                         , "*str a 123 z")
+    _("*str a %s z",  '*str \'"\x7f'              , "*str a *str '\"\x7f z")
+    _("*str a %s z",  'β'                         , "*str a β z")
+    _("*str a %s z",  ('β',)                      , "*str a β z")
     _("*str a %s z",  ['β']                       , "*str a ['β'] z")
 
-    _("a %s π",  123)
-    _("a %s π",  '*str \'"\x7f')
-    _("a %s π",  'β')
-    _("a %s π",  ('β',))
+    _("a %s π",  123                              , "a 123 π")
+    _("a %s π",  '*str \'"\x7f'                   , "a *str '\"\x7f π")
+    _("a %s π",  'β'                              , "a β π")
+    _("a %s π",  ('β',)                           , "a β π")
     _("a %s π",  ['β']                            , "a ['β'] π")
 
-    _("α %s z",  123)
-    _("α %s z",  '*str \'"\x7f')
-    _("α %s z",  'β')
-    _("α %s z",  ('β',))
+    _("α %s z",  123                              , "α 123 z")
+    _("α %s z",  '*str \'"\x7f'                   , "α *str '\"\x7f z")
+    _("α %s z",  'β'                              , "α β z")
+    _("α %s z",  ('β',)                           , "α β z")
     _("α %s z",  ['β']                            , "α ['β'] z")
 
-    _("α %s π",  123)
-    _("α %s π",  '*str \'"\x7f')
-    _("α %s π",  'β')
-    _("α %s π",  ('β',))
-    _("α %s π",  ('β',))
-    _("α %s %s π",  ('β', 'γ'))
-    _("α %s %s %s π",  ('β', 'γ', 'δ'))
-    _("α %s %s %s %s %s %s %s π",  (1, 'β', 2, 'γ', 3, 'δ', 4))
-    _("α %s π",  [])
-    _("α %s π",  ([],))
-    _("α %s π",  ((),))
-    _("α %s π",  set())
-    _("α %s π",  (set(),))
-    _("α %s π",  frozenset())
-    _("α %s π",  (frozenset(),))
-    _("α %s π",  ({},))
+    _("α %s π",  123                              , "α 123 π")
+    _("α %s π",  '*str \'"\x7f'                   , "α *str '\"\x7f π")
+    _("α %s π",  'β'                              , "α β π")
+    _("α %s π",  ('β',)                           , "α β π")
+    _("α %s π",  ('β',)                           , "α β π")
+    _("α %s %s π",  ('β', 'γ')                    , "α β γ π")
+    _("α %s %s %s π",  ('β', 'γ', 'δ')            , "α β γ δ π")
+    _("α %s %s %s %s %s %s %s π",  (1, 'β', 2, 'γ', 3, 'δ', 4),
+                                                    "α 1 β 2 γ 3 δ 4 π")
+    _("α %s π",  []                               , "α [] π")
+    _("α %s π",  ([],)                            , "α [] π")
+    _("α %s π",  ((),)                            , "α () π")
+    _("α %s π",  set()                            , x32("α set() π", "α set([]) π"))
+    _("α %s π",  (set(),)                         , x32("α set() π", "α set([]) π"))
+    _("α %s π",  frozenset()                      , x32("α frozenset() π", "α frozenset([]) π"))
+    _("α %s π",  (frozenset(),)                   , x32("α frozenset() π", "α frozenset([]) π"))
+    _("α %s π",  ({},)                            , "α {} π")
     _("α %s π",  ['β']                            , "α ['β'] π")
     _("α %s π",  (['β'],)                         , "α ['β'] π")
     _("α %s π",  (('β',),)                        , "α ('β',) π")
@@ -1279,7 +1285,8 @@ def _(fmt, args, *okv):
     # recursive frozenset
     l = hlist()
     f = frozenset({1, l}); l.append(f)
-    _('α %s π', (f,))
+    _('α %s π', (f,)                              , *x32(("α frozenset({1, [frozenset(...)]}) π", "α frozenset({[frozenset(...)], 1}) π"),
+                                                         ("α frozenset([1, [frozenset(...)]]) π", "α frozenset([[frozenset(...)], 1]) π")))
 
     # recursive dict (via value)
     d = {1:'мир'}; d.update({2:d})
@@ -1296,15 +1303,15 @@ def _(fmt, args, *okv):
     class Cold:
         def __repr__(self): return "Cold()"
         def __str__(self):  return u"Класс (old)"
-    _('α %s π', Cold())
-    _('α %s π', (Cold(),))
+    _('α %s π', Cold()                            , "α Класс (old) π")
+    _('α %s π', (Cold(),)                         , "α Класс (old) π")
 
     # new-style class with __str__
     class Cnew(object):
         def __repr__(self): return "Cnew()"
         def __str__(self):  return u"Класс (new)"
-    _('α %s π', Cnew())
-    _('α %s π', (Cnew(),))
+    _('α %s π', Cnew()                            , "α Класс (new) π")
+    _('α %s π', (Cnew(),)                         , "α Класс (new) π")
 
 
     # custom classes inheriting from set/list/tuple/dict/frozenset
@@ -1334,7 +1341,10 @@ class D(dict):      pass
     # namedtuple
     cc = collections; xcc = six.moves
     Point = cc.namedtuple('Point', ['x', 'y'])
-    _('α %s π', (Point('β','γ'),)             , "α Point(x='β', y='γ') π")
+    verify_fmt_all_types(lambda fmt, args: fmt % args,
+      'α %s π',   Point('β','γ')              , TypeError("not all arguments converted during string formatting"), excok=True)
+    _('α %s %s π',Point('β','γ')              , "α β γ π")
+    _('α %s π',  (Point('β','γ'),)            , "α Point(x='β', y='γ') π")
     # deque
     _('α %s π', cc.deque(['β','γ'])           , "α deque(['β', 'γ']) π")
     _('α %s π', (cc.deque(['β','γ']),)        , "α deque(['β', 'γ']) π")
@@ -1536,6 +1546,14 @@ def test_strings__format__():
 # verify print for bstr/ustr.
 def test_strings_print():
     outok = readfile(dir_testprog + "/golang_test_str.txt")
+    # repr(bstr|ustr) is changed if string types are patched:
+    # b('...') ->  '...'  if bstr is patched in
+    # u('...') -> u'...'  if ustr is patched in  (here we assume it is all valid utf8 there)
+    if bstr is bytes:
+        outok = re.sub(br"b\((.*?)\)", x32(r"b\1", r"\1"), outok)
+    if ustr is unicode:
+        outok = re.sub(br"u\((.*?)\)", x32(r"\1", r"u\1"), outok)
+
     retcode, stdout, stderr = _pyrun(["golang_test_str.py"],
                                 cwd=dir_testprog, stdout=PIPE, stderr=PIPE)
     assert retcode == 0, (stdout, stderr)
@@ -1578,7 +1596,11 @@ def checkop(s, meth, *argv, **kw):
         ur = xcall(us, meth, *argv, **kw)
 
         def assertDeepEQ(a, b, bstrtype):
-            assert not isinstance(a, (bstr, ustr))
+            # `assert not isinstance(a, (bstr, ustr))` done carefully not to
+            # break when bytes/unicode are patched with bstr/ustr
+            if isinstance(a, bytes):    assert type(a) is bytes
+            if isinstance(a, unicode):  assert type(a) is unicode
+
             if type(a) is unicode:
                 assert type(b) is bstrtype
                 assert a == b
@@ -1841,6 +1863,26 @@ class MyStr(tx):
     _  = b(xx);     assert type(_)  is bstr ; assert _ == 'мир'
     _  = u(xx);     assert type(_)  is ustr ; assert _ == 'мир'
 
+    # __str__ returns *str, not MyStr
+    txstr = {
+        unicode: str,
+        bstr:    x32(ustr, bstr),
+        ustr:    x32(ustr, bstr),
+    }[tx]
+    if six.PY2  and  tx is unicode: # on py2 unicode.__str__ raises UnicodeEncodeError:
+        aa = u'mir'                 # `'ascii' codec can't encode ...` -> do the test on ascii
+        _  = aa.__str__();  assert _ == 'mir'
+    else:
+        _  = xx.__str__();  assert _ == 'мир'
+    assert type(_) is txstr
+
+    # for bstr/ustr  __bytes__/__unicode__ return *str, never MyStr
+    # (builtin unicode has no __bytes__/__unicode__)
+    if tx is not unicode:
+        _ = xx.__bytes__();    assert type(_) is bstr; assert _ == 'мир'
+        _ = xx.__unicode__();  assert type(_) is ustr; assert _ == 'мир'
+
+
     # subclass with __str__
     class MyStr(tx):
         def __str__(self): return u'αβγ'
@@ -1864,6 +1906,17 @@ def __str__(self):
     with raises(TypeError): u(xx)
 
 
+# verify that bstr/ustr has no extra attributes compared to str and UserString.
+# (else e.g. IPython's guarded_eval.py fails when doing `_list_methods(collections.UserString, dir(str)`.
+# XXX gpython-only ?
+@mark.parametrize('tx', (bstr, ustr))
+def _test_strings_no_extra_methods(tx):     # XXX reenable  (str does not have __bytes__)
+    from six.moves import UserString
+    for attr in dir(tx):
+        assert hasattr(str, attr)
+        assert hasattr(UserString, attr)
+
+
 def test_qq():
     # NOTE qq is also tested as part of strconv.quote
 
@@ -2417,20 +2470,24 @@ def R(x):
 
 # verify that what we patched - e.g. bytes.__repr__ - stay unaffected when
 # called outside of bstr/ustr context.
+# NOTE this test is complemented by test_pickle_strings_patched_transparently in golang_str_pickle_test.py
 def test_strings_patched_transparently():
     b_  = xbytes    ("мир");  assert type(b_)  is bytes
     u_  = xunicode  ("мир");  assert type(u_)  is unicode
     ba_ = xbytearray("мир");  assert type(ba_) is bytearray
 
     # standard {repr,str}(bytes|unicode|bytearray) stay unaffected
-    assert repr(b_)  == x32(r"b'\xd0\xbc\xd0\xb8\xd1\x80'",
-                             r"'\xd0\xbc\xd0\xb8\xd1\x80'")
-    assert repr(u_)  == x32(r"'мир'",
-                            r"u'\u043c\u0438\u0440'")
+    assert repr(b_)  == xB32(x32("b'мир'", "'мир'"),
+                             r"b'\xd0\xbc\xd0\xb8\xd1\x80'",
+                              r"'\xd0\xbc\xd0\xb8\xd1\x80'")
+    assert repr(u_)  == xU32(x32("'мир'", "u'мир'"),
+                             r"'мир'",
+                             r"u'\u043c\u0438\u0440'")
     assert repr(ba_) == r"bytearray(b'\xd0\xbc\xd0\xb8\xd1\x80')"
 
-    assert str(b_)   == x32(r"b'\xd0\xbc\xd0\xb8\xd1\x80'",
-                               "\xd0\xbc\xd0\xb8\xd1\x80")
+    assert str(b_)   == xS32("мир",
+                             r"b'\xd0\xbc\xd0\xb8\xd1\x80'",
+                                "\xd0\xbc\xd0\xb8\xd1\x80")
     if six.PY3  or  sys.getdefaultencoding() == 'utf-8': # py3 or gpython/py2
         assert str(u_) == "мир"
     else:
@@ -2438,8 +2495,9 @@ def test_strings_patched_transparently():
         with raises(UnicodeEncodeError): str(u_)  # 'ascii' codec can't encode ...
         assert str(u'abc') == "abc"
 
-    assert str(ba_)  == x32(r"bytearray(b'\xd0\xbc\xd0\xb8\xd1\x80')",
-                                        b'\xd0\xbc\xd0\xb8\xd1\x80')
+    assert str(ba_)  == xS32("мир",
+                             r"bytearray(b'\xd0\xbc\xd0\xb8\xd1\x80')",
+                                         b'\xd0\xbc\xd0\xb8\xd1\x80')
 
     # unicode comparison stay unaffected
     assert (u_ == u_)  is True
@@ -2458,9 +2516,10 @@ def test_strings_patched_transparently():
     assert (u_ >= u2)  is True      ; assert (u2 >= u_)  is False
 
     # bytearray.__init__ stay unaffected
-    with raises(TypeError): bytearray(u'мир')
-    a = bytearray()
-    with raises(TypeError): a.__init__(u'мир')
+    if ustr is not unicode:
+        with raises(TypeError): bytearray(u'мир')
+        a = bytearray()
+        with raises(TypeError): a.__init__(u'мир')
 
     def _(*argv):
         a = bytearray(*argv)
@@ -2530,9 +2589,29 @@ def bench_bencode(b):
 
 # xbytes/xunicode/xbytearray convert provided bytes/unicode object to bytes,
 # unicode or bytearray correspondingly to function name.
-def xbytes(x):     return x.encode('utf-8') if type(x) is unicode else x
-def xunicode(x):   return x.decode('utf-8') if type(x) is bytes   else x
-def xbytearray(x): return bytearray(xbytes(x))
+def xbytes(x):
+    assert isinstance(x, (bytes,unicode))
+    if isinstance(x, unicode):
+        x = x.encode('utf-8')
+    assert isinstance(x, bytes)
+    x = _bdata(x)
+    assert type(x) is bytes
+    return x
+
+def xunicode(x):
+    assert isinstance(x, (bytes,unicode))
+    if isinstance(x, bytes):
+        x = x.decode('utf-8')
+    assert isinstance(x, unicode)
+    x = _udata(x)
+    assert type(x) is unicode
+    return x
+
+def xbytearray(x):
+    assert isinstance(x, (bytes,unicode))
+    x = bytearray(xbytes(x))
+    assert type(x) is bytearray
+    return x
 
 # deepReplaceStr2Bytearray replaces str to bytearray, or hashable-version of
 # bytearray, if str objects are detected to be present inside set or dict keys.
@@ -2625,3 +2704,29 @@ def __hash__(self):
 # x32(a,b) returns a on py3, or b on py2
 def x32(a, b):
     return a if six.PY3 else b
+
+# xb32(x, y, z) returns x if (bstr is not bytes)    or  x32(y,z)
+# xu32(x, y, z) returns x if (ustr is not unicode)  or  x32(y,z)
+def xb32(x, y, z):
+    return x if (bstr is not bytes)   else x32(y,z)
+def xu32(x, y, z):
+    return x if (ustr is not unicode) else x32(y,z)
+
+# xB32(x, y, z) returns x if (bstr is     bytes)    or  x32(y,z)
+# xU32(x, y, z) returns x if (ustr is     unicode)  or  x32(y,z)
+# xS32(x, y, z) returns x if (str  is bstr|ustr)    or  x32(y,z)
+# XXX replace usage of xB32 to directly via xB ?
+def xB32(x, y, z): return xB(x, x32(y,z))
+def xU32(x, y, z): return xU(x, x32(y,z))
+def xS32(x, y, z): return xS(x, x32(y,z))
+
+
+# xB(x, y) returns x if (bstr is     bytes)    or  y
+# xU(x, y) returns x if (ustr is     unicode)  or  y
+# xS(x, y) returns x if (str  is bstr|ustr)    or  y
+def xB(x, y):
+    return x if (bstr is     bytes)   else y
+def xU(x, y):
+    return x if (ustr is     unicode) else y
+def xS(x, y):
+    return x if (str is bstr  or  str is ustr) else y
diff --git a/golang/libgolang.h b/golang/libgolang.h
index 0d4c153..53a8aec 100644
--- a/golang/libgolang.h
+++ b/golang/libgolang.h
@@ -169,6 +169,8 @@
 // [1] Libtask: a Coroutine Library for C and Unix. https://swtch.com/libtask.
 // [2] http://9p.io/magic/man2html/2/thread.
 
+#include "golang/runtime/platform.h"
+
 #include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
@@ -177,21 +179,18 @@
 #include <sys/stat.h>
 
 #include <fcntl.h>
-#ifdef _MSC_VER // no mode_t on msvc
+#ifdef LIBGOLANG_CC_msc // no mode_t on msvc
 typedef int mode_t;
 #endif
 
 
 // DSO symbols visibility (based on https://gcc.gnu.org/wiki/Visibility)
-#if defined _WIN32 || defined __CYGWIN__
+#ifdef LIBGOLANG_OS_windows
   #define LIBGOLANG_DSO_EXPORT __declspec(dllexport)
   #define LIBGOLANG_DSO_IMPORT __declspec(dllimport)
-#elif __GNUC__ >= 4
+#else
   #define LIBGOLANG_DSO_EXPORT __attribute__ ((visibility ("default")))
   #define LIBGOLANG_DSO_IMPORT __attribute__ ((visibility ("default")))
-#else
-  #define LIBGOLANG_DSO_EXPORT
-  #define LIBGOLANG_DSO_IMPORT
 #endif
 
 #if BUILDING_LIBGOLANG
diff --git a/golang/os.cpp b/golang/os.cpp
index bc37c64..a7c7f2a 100644
--- a/golang/os.cpp
+++ b/golang/os.cpp
@@ -38,7 +38,7 @@
 // cut this short
 // (on darwing sys_siglist declaration is normally provided)
 // (on windows sys_siglist is not available at all)
-#if !(defined(__APPLE__) || defined(_WIN32))
+#if !(defined(LIBGOLANG_OS_darwin) || defined(LIBGOLANG_OS_windows))
 extern "C" {
     extern const char * const sys_siglist[];
 }
@@ -287,7 +287,7 @@ string Signal::String() const {
     const Signal& sig = *this;
     const char *sigstr = nil;
 
-#ifdef _WIN32
+#ifdef LIBGOLANG_OS_windows
     switch (sig.signo) {
     case SIGABRT:   return "Aborted";
     case SIGBREAK:  return "Break";
diff --git a/golang/os.h b/golang/os.h
index 0082544..9ad0c99 100644
--- a/golang/os.h
+++ b/golang/os.h
@@ -96,7 +96,7 @@ class _File : public object {
 // Open opens file @path.
 LIBGOLANG_API std::tuple<File, error> Open(const string &path, int flags = O_RDONLY,
         mode_t mode =
-#if !defined(_MSC_VER)
+#if !defined(LIBGOLANG_CC_msc)
                       S_IRUSR | S_IWUSR | S_IXUSR |
                       S_IRGRP | S_IWGRP | S_IXGRP |
                       S_IROTH | S_IWOTH | S_IXOTH
diff --git a/golang/os/signal.cpp b/golang/os/signal.cpp
index 9787c8d..793e7a4 100644
--- a/golang/os/signal.cpp
+++ b/golang/os/signal.cpp
@@ -89,7 +89,7 @@
 #include <atomic>
 #include <tuple>
 
-#if defined(_WIN32)
+#if defined(LIBGOLANG_OS_windows)
 # include <windows.h>
 #endif
 
@@ -101,7 +101,7 @@
 #  define debugf(format, ...) do {} while (0)
 #endif
 
-#if defined(_MSC_VER)
+#ifdef LIBGOLANG_CC_msc
 # define HAVE_SIGACTION 0
 #else
 # define HAVE_SIGACTION 1
@@ -194,7 +194,7 @@ void _init() {
     if (err != nil)
         panic("os::newFile(_wakerx");
     _waketx = vfd[1];
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
     if (sys::Fcntl(_waketx, F_SETFL, O_NONBLOCK) < 0)
         panic("fcntl(_waketx, O_NONBLOCK)");    // TODO +syserr
 #else
diff --git a/golang/pyx/build.py b/golang/pyx/build.py
index 1cb5f3f..3c15f22 100644
--- a/golang/pyx/build.py
+++ b/golang/pyx/build.py
@@ -35,7 +35,7 @@
 # pygolang uses setuptools_dso.DSO to build libgolang; all extensions link to it.
 import setuptools_dso
 
-import sys, pkgutil, platform, sysconfig
+import os, sys, pkgutil, platform, sysconfig
 from os.path import dirname, join, exists
 from distutils.errors import DistutilsError
 
@@ -68,7 +68,7 @@ def _findpkg(pkgname):  # -> _PyPkg
 
 # build_ext amends setuptools_dso.build_ext to allow combining C and C++
 # sources in one extension without hitting `error: invalid argument
-# '-std=c++11' not allowed with 'C'`.
+# '-std=c++11' not allowed with 'C'`. XXX + asm
 _dso_build_ext = setuptools_dso.build_ext
 class build_ext(_dso_build_ext):
     def build_extension(self, ext):
@@ -108,12 +108,33 @@ def filter_out(argprefix):
         # do per-source adjustsment only in .spawn .
         spawn = self.compiler.spawn
         def xspawn(argv):
+            argv = argv[:]
+
             c = False
-            for arg in argv:
+            S = False
+            for i,arg in enumerate(argv):
                 if arg.startswith('/Tc'):
-                    c = True
-            if c:
-                argv = argv[:]
+                    if arg.endswith('.S'):
+                        argv[i] = arg[3:]   # /Tcabc.S -> abc.S
+                        S = True
+                    else:
+                        c = True
+
+            # change cl.exe -> clang-cl.exe for assembly files so that assembler dialect is the same everywhere
+            if S:
+                assert argv[0] == self.compiler.cc, (argv, self.compiler.cc)
+                argv[0] = self.compiler.clang_cl
+
+                # clang-cl fails on *.S if also given /EH... -> remove /EH...
+                while 1:
+                    for i in range(len(argv)):
+                        if argv[i].startswith('/EH'):
+                            del argv[i]
+                            break
+                    else:
+                        break
+
+            if c or S:
                 for i in range(len(argv)):
                     if argv[i] == '/std:c++20':
                         argv[i] = '/std:c11'
@@ -128,6 +149,22 @@ def xspawn(argv):
             self.compiler._compile = _compile
             self.compiler.spawn    = spawn
 
+    def build_extensions(self):
+        # adjust .compiler to support assembly sources
+        cc = self.compiler
+        if '.S' not in cc.src_extensions:
+            cc.src_extensions.append('.S')
+            cc.language_map['.S'] = 'asm'
+            cc.language_order.append('asm')
+            # XXX refer to https://blog.mozilla.org/nfroyd/2019/04/25/an-unexpected-benefit-of-standardizing-on-clang-cl/
+            if cc.compiler_type == 'msvc':
+                if not cc.initialized:
+                    cc.initialize()
+                ccmod = sys.modules[cc.__module__]
+                cc.clang_cl = ccmod._find_exe('clang-cl.exe', cc._paths.split(os.pathsep))
+                cc._c_extensions.append('.S')   # MSVCCompiler thinks it is C, but xspawn handles .S specially
+        _dso_build_ext.build_extensions(self)
+
 
 # setup should be used instead of setuptools.setup
 def setup(**kw):
@@ -176,8 +213,8 @@ def _with_build_defaults(name, kw):   # -> (pygo, kw')
     incv.insert(1, join(pygo, 'golang', '_compat', sysname))
     kw['include_dirs'] = incv
 
-    # link with libgolang.so  if it is not libgolang itself
-    if name != 'golang.runtime.libgolang':
+    # link with libgolang.so  if it is not libgolang itself, or another internal DSO
+    if name not in ('golang.runtime.libgolang', 'golang.runtime.funchook'):
         dsov = kw.get('dsos', [])[:]
         dsov.insert(0, 'golang.runtime.libgolang')
         kw['dsos'] = dsov
@@ -212,9 +249,11 @@ def _with_build_defaults(name, kw):   # -> (pygo, kw')
     dependv = kw.get('depends', [])[:]
     dependv.extend(['%s/golang/%s' % (pygo, _) for _ in [
         'libgolang.h',
+        'runtime.h',
         'runtime/internal.h',
         'runtime/internal/atomic.h',
         'runtime/internal/syscall.h',
+        'runtime/platform.h',
         'context.h',
         'cxx.h',
         'errors.h',
diff --git a/golang/runtime.cpp b/golang/runtime.cpp
new file mode 100644
index 0000000..0fc63e6
--- /dev/null
+++ b/golang/runtime.cpp
@@ -0,0 +1,69 @@
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// Package runtime mirrors Go package runtime.
+// See runtime.h for package overview.
+
+#include "golang/runtime.h"
+
+
+// golang::runtime::
+namespace golang {
+namespace runtime {
+
+const string ARCH =
+#ifdef LIBGOLANG_ARCH_386
+    "386"
+#elif defined(LIBGOLANG_ARCH_amd64)
+    "amd64"
+#elif defined(LIBGOLANG_ARCH_arm64)
+    "arm64"
+#else
+# error
+#endif
+    ;
+
+
+const string OS =
+#ifdef LIBGOLANG_OS_linux
+    "linux"
+#elif defined(LIBGOLANG_OS_darwin)
+    "darwin"
+#elif defined(LIBGOLANG_OS_windows)
+    "windows"
+#else
+# error
+#endif
+    ;
+
+
+const string CC =
+#ifdef LIBGOLANG_CC_gcc
+    "gcc"
+#elif defined(LIBGOLANG_CC_clang)
+    "clang"
+#elif defined(LIBGOLANG_CC_msc)
+    "msc"
+#else
+# error
+#endif
+    ;
+
+
+}}  // golang::runtime::
diff --git a/golang/runtime.h b/golang/runtime.h
new file mode 100644
index 0000000..60b5765
--- /dev/null
+++ b/golang/runtime.h
@@ -0,0 +1,50 @@
+#ifndef _NXD_LIBGOLANG_RUNTIME_H
+#define _NXD_LIBGOLANG_RUNTIME_H
+
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// Package runtime mirrors Go package runtime.
+
+#include "golang/libgolang.h"
+
+
+// golang::runtime::
+namespace golang {
+namespace runtime {
+
+// ARCH indicates processor architecture, that is running the program.
+//
+// e.g. "386", "amd64", "arm64", ...
+extern LIBGOLANG_API const string ARCH;
+
+// OS indicates operating system, that is running the program.
+//
+// e.g. "linux", "darwin", "windows", ...
+extern LIBGOLANG_API const string OS;
+
+// CC indicates C/C++ compiler, that compiled the program.
+//
+// e.g. "gcc", "clang", "msc", ...
+extern LIBGOLANG_API const string CC;
+
+
+}} // golang::runtime::
+
+#endif  // _NXD_LIBGOLANG_RUNTIME_H
diff --git a/golang/runtime/internal/atomic.cpp b/golang/runtime/internal/atomic.cpp
index e3faa98..2669714 100644
--- a/golang/runtime/internal/atomic.cpp
+++ b/golang/runtime/internal/atomic.cpp
@@ -20,7 +20,7 @@
 #include "golang/runtime/internal/atomic.h"
 #include "golang/libgolang.h"
 
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
 #include <pthread.h>
 #endif
 
@@ -44,7 +44,7 @@ static void _forkNewEpoch() {
 
 void _init() {
 // there is no fork on windows
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
     int e = pthread_atfork(/*prepare*/nil, /*inparent*/nil, /*inchild*/_forkNewEpoch);
     if (e != 0)
         panic("pthread_atfork failed");
diff --git a/golang/runtime/internal/syscall.cpp b/golang/runtime/internal/syscall.cpp
index c998e17..4602c0a 100644
--- a/golang/runtime/internal/syscall.cpp
+++ b/golang/runtime/internal/syscall.cpp
@@ -58,9 +58,9 @@ string _Errno::Error() {
 
     char ebuf[128];
     bool ok;
-#if __APPLE__
+#ifdef LIBGOLANG_OS_darwin
     ok = (::strerror_r(-e.syserr, ebuf, sizeof(ebuf)) == 0);
-#elif defined(_WIN32)
+#elif defined(LIBGOLANG_OS_windows)
     ok = (::strerror_s(ebuf, sizeof(ebuf), -e.syserr) == 0);
 #else
     char *estr = ::strerror_r(-e.syserr, ebuf, sizeof(ebuf));
@@ -102,7 +102,7 @@ __Errno Close(int fd) {
     return err;
 }
 
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
 __Errno Fcntl(int fd, int cmd, int arg) {
     int save_errno = errno;
     int err = ::fcntl(fd, cmd, arg);
@@ -124,7 +124,7 @@ __Errno Fstat(int fd, struct ::stat *out_st) {
 
 int Open(const char *path, int flags, mode_t mode) {
     int save_errno = errno;
-#ifdef _WIN32  // default to open files in binary mode
+#ifdef LIBGOLANG_OS_windows  // default to open files in binary mode
     if ((flags & (_O_TEXT | _O_BINARY)) == 0)
         flags |= _O_BINARY;
 #endif
@@ -141,9 +141,9 @@ __Errno Pipe(int vfd[2], int flags) {
         return -EINVAL;
     int save_errno = errno;
     int err;
-#ifdef __linux__
+#ifdef LIBGOLANG_OS_linux
     err = ::pipe2(vfd, flags);
-#elif defined(_WIN32)
+#elif defined(LIBGOLANG_OS_windows)
     err = ::_pipe(vfd, 4096, flags | _O_BINARY);
 #else
     err = ::pipe(vfd);
@@ -167,7 +167,7 @@ __Errno Pipe(int vfd[2], int flags) {
     return err;
 }
 
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
 __Errno Sigaction(int signo, const struct ::sigaction *act, struct ::sigaction *oldact) {
     int save_errno = errno;
     int err = ::sigaction(signo, act, oldact);
diff --git a/golang/runtime/internal/syscall.h b/golang/runtime/internal/syscall.h
index e44160b..4771a19 100644
--- a/golang/runtime/internal/syscall.h
+++ b/golang/runtime/internal/syscall.h
@@ -63,13 +63,13 @@ LIBGOLANG_API int/*n|err*/ Read(int fd, void *buf, size_t count);
 LIBGOLANG_API int/*n|err*/ Write(int fd, const void *buf, size_t count);
 
 LIBGOLANG_API __Errno Close(int fd);
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
 LIBGOLANG_API __Errno Fcntl(int fd, int cmd, int arg);
 #endif
 LIBGOLANG_API __Errno Fstat(int fd, struct ::stat *out_st);
 LIBGOLANG_API int/*fd|err*/ Open(const char *path, int flags, mode_t mode);
 LIBGOLANG_API __Errno Pipe(int vfd[2], int flags);
-#ifndef _WIN32
+#ifndef LIBGOLANG_OS_windows
 LIBGOLANG_API __Errno Sigaction(int signo, const struct ::sigaction *act, struct ::sigaction *oldact);
 #endif
 typedef void (*sighandler_t)(int);
diff --git a/golang/runtime/libgolang.cpp b/golang/runtime/libgolang.cpp
index 96208f8..3714cc7 100644
--- a/golang/runtime/libgolang.cpp
+++ b/golang/runtime/libgolang.cpp
@@ -52,7 +52,7 @@
 #include <linux/list.h>
 // MSVC does not support statement expressions and typeof
 // -> redo list_entry via C++ lambda.
-#ifdef _MSC_VER
+#ifdef LIBGOLANG_CC_msc
 # undef list_entry
 # define list_entry(ptr, type, member) [&]() {                      \
         const decltype( ((type *)0)->member ) *__mptr = (ptr);      \
diff --git a/golang/runtime/platform.h b/golang/runtime/platform.h
new file mode 100644
index 0000000..8def7e7
--- /dev/null
+++ b/golang/runtime/platform.h
@@ -0,0 +1,65 @@
+#ifndef _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
+#define _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
+
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// Header platform.h provides preprocessor defines that describe target platform.
+
+// LIBGOLANG_ARCH_<X> is defined on architecture X.
+//
+// List of supported architectures: 386, amd64, arm64.
+#if defined(__i386__) || defined(_M_IX86)
+# define LIBGOLANG_ARCH_386     1
+#elif defined(__x86_64__) || defined(_M_X64)
+# define LIBGOLANG_ARCH_amd64   1
+#elif defined(__aarch64__) || defined(_M_ARM64)
+# define LIBGOLANG_ARCH_arm64   1
+#else
+# error "unsupported architecture"
+#endif
+
+// LIBGOLANG_OS_<X> is defined on operating system X.
+//
+// List of supported operating systems: linux, darwin, windows.
+#ifdef __linux__
+# define LIBGOLANG_OS_linux     1
+#elif defined(__APPLE__)
+# define LIBGOLANG_OS_darwin    1
+#elif defined(_WIN32) || defined(__CYGWIN__)
+# define LIBGOLANG_OS_windows   1
+#else
+# error "unsupported operating system"
+#endif
+
+// LIBGOLANG_CC_<X> is defined on C/C++ compiler X.
+//
+// List of supported compilers: gcc, clang, msc.
+#ifdef __clang__
+# define LIBGOLANG_CC_clang     1
+#elif defined(_MSC_VER)
+# define LIBGOLANG_CC_msc       1
+// NOTE gcc comes last because e.g. clang and icc define __GNUC__ as well
+#elif __GNUC__
+# define LIBGOLANG_CC_gcc       1
+#else
+# error "unsupported compiler"
+#endif
+
+#endif  // _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
diff --git a/gpython/.gitignore b/gpython/.gitignore
new file mode 100644
index 0000000..ab6c7de
--- /dev/null
+++ b/gpython/.gitignore
@@ -0,0 +1 @@
+/_gpython.cpp
diff --git a/gpython/__init__.py b/gpython/__init__.py
index c6be786..14b2fce 100755
--- a/gpython/__init__.py
+++ b/gpython/__init__.py
@@ -25,10 +25,14 @@
 
 - gevent is pre-activated and stdlib is patched to be gevent aware;
 - go, chan, select etc are put into builtin namespace;
-- default string encoding is always set to UTF-8.
+- default string encoding is always set to UTF-8;
+- bstr/ustr replace builtin str/unicode types.
 
 Gevent activation can be disabled via `-X gpython.runtime=threads`, or
 $GPYTHON_RUNTIME=threads.
+
+String types replacement can be disabled via `-X gpython.strings=pystd`, or
+$GPYTHON_STRINGS=pystd.
 """
 
 # NOTE gpython is kept out of golang/ , since even just importing e.g. golang.cmd.gpython,
@@ -230,9 +234,13 @@ def run(mmain):
             gevent = sys.modules.get('gevent', None)
             gpyver = 'GPython %s' % golang.__version__
             if gevent is not None:
-                gpyver += ' [gevent %s]' % gevent.__version__
+                gpyver += ' [runtime gevent %s]' % gevent.__version__
+            else:
+                gpyver += ' [runtime threads]'
+            if type(u'') is golang.ustr:
+                gpyver += ' [strings bstr+ustr]'
             else:
-                gpyver += ' [threads]'
+                gpyver += ' [strings pystd]'
             ver.append(gpyver)
 
         import platform
@@ -344,6 +352,9 @@ def main():
     # imported first, e.g. to support sys.modules.
     import sys
 
+    # import pyx/c part of gpython
+    from gpython import _gpython
+
     # safety check that we are not running from a setuptools entrypoint, where
     # it would be too late to monkey-patch stdlib.
     #
@@ -372,6 +383,7 @@ def main():
         reload(sys)
         sys.setdefaultencoding('utf-8')
         delattr(sys, 'setdefaultencoding')
+        _gpython.set_utf8_as_default_src_encoding()
 
 
     # import os to get access to environment.
@@ -381,10 +393,12 @@ def main():
     import os
 
     # extract and process `-X gpython.*`
-    # -X gpython.runtime=(gevent|threads)    + $GPYTHON_RUNTIME
+    # -X gpython.runtime=(gevent|threads)       + $GPYTHON_RUNTIME
+    # -X gpython.strings=(bstr+ustr|pystd)      + $GPYTHON_STRINGS
     sys._xoptions = getattr(sys, '_xoptions', {})
     argv_ = []
     gpy_runtime = os.getenv('GPYTHON_RUNTIME', 'gevent')
+    gpy_strings = os.getenv('GPYTHON_STRINGS', 'bstr+ustr')
     igetopt = _IGetOpt(sys.argv[1:], _pyopt, _pyopt_long)
     for (opt, arg) in igetopt:
         if opt == '-X':
@@ -393,6 +407,10 @@ def main():
                     gpy_runtime = arg[len('gpython.runtime='):]
                     sys._xoptions['gpython.runtime'] = gpy_runtime
 
+                elif arg.startswith('gpython.strings='):
+                    gpy_strings = arg[len('gpython.strings='):]
+                    sys._xoptions['gpython.strings'] = gpy_strings
+
                 else:
                     raise RuntimeError('gpython: unknown -X option %s' % arg)
 
@@ -412,13 +430,15 @@ def main():
     # sys.executable spawned from under `gpython -X gpython.runtime=threads`
     # also uses "threads" runtime by default.
     os.environ['GPYTHON_RUNTIME'] = gpy_runtime
+    os.environ['GPYTHON_STRINGS'] = gpy_strings
 
-    # init initializes according to selected runtime
+    # init initializes according to selected runtime and strings
     # it is called after options are parsed and sys.path is setup correspondingly.
     # this way golang and gevent are imported from exactly the same place as
     # they would be in standard python after regular import (ex from golang/
     # under cwd if run under `python -c ...` or interactive console.
     def init():
+        gpy_runtime_ver = gpy_runtime
         if gpy_runtime == 'gevent':
             # make gevent pre-available & stdlib patched
             import gevent
@@ -434,22 +454,30 @@ def init():
             if _ not in (True, None):   # patched or nothing to do
                 # XXX provide details
                 raise RuntimeError('gevent monkey-patching failed')
-            gpy_verextra = 'gevent %s' % gevent.__version__
+            gpy_runtime_ver += ' %s' % gevent.__version__
 
         elif gpy_runtime == 'threads':
-            gpy_verextra = 'threads'
-
+            pass
         else:
-            raise RuntimeError('gpython: invalid runtime %s' % gpy_runtime)
+            raise RuntimeError('gpython: invalid runtime %r' % gpy_runtime)
 
-        # put go, chan, select, ... into builtin namespace
+        if gpy_strings not in ('bstr+ustr', 'pystd'):
+            raise RuntimeError('gpython: invalid strings %r' % gpy_strings)
+
+        # import golang
+        # this will activate selected runtime and strings
+        sys._gpy_runtime = gpy_runtime
+        sys._gpy_strings = gpy_strings
         import golang
+
+        # put go, chan, select, ... into builtin namespace
         from six.moves import builtins
         for k in golang.__all__:
             setattr(builtins, k, getattr(golang, k))
+        setattr(builtins, 'CCC', CCC)
 
         # sys.version
-        sys.version += (' [GPython %s] [%s]' % (golang.__version__, gpy_verextra))
+        sys.version += (' [GPython %s] [runtime %s] [strings %s]' % (golang.__version__, gpy_runtime_ver, gpy_strings))
 
     # tail to pymain
     pymain(argv, init)
@@ -567,5 +595,11 @@ def __next__(self):
     next = __next__ # for py2
 
 
+# for tests XXX continue by first writing test  XXX
+1/0
+class _tEarlyStrSubclass(str):
+    pass
+
+
 if __name__ == '__main__':
     main()
diff --git a/gpython/_gpython.pyx b/gpython/_gpython.pyx
new file mode 100644
index 0000000..ada1df8
--- /dev/null
+++ b/gpython/_gpython.pyx
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# cython: language_level=2
+# Copyright (C) 2023  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+"""_gpython.pyx ... XXX
+"""
+
+cdef extern from *:
+    """
+    void _set_utf8_as_default_src_encoding();
+    """
+    void _set_utf8_as_default_src_encoding() except *
+
+def set_utf8_as_default_src_encoding():
+    _set_utf8_as_default_src_encoding()
diff --git a/gpython/_gpython_c.cpp b/gpython/_gpython_c.cpp
new file mode 100644
index 0000000..05ba977
--- /dev/null
+++ b/gpython/_gpython_c.cpp
@@ -0,0 +1,76 @@
+// Copyright (C) 2023  Nexedi SA and Contributors.
+//                     Kirill Smelkov <kirr@nexedi.com>
+//
+// This program is free software: you can Use, Study, Modify and Redistribute
+// it under the terms of the GNU General Public License version 3, or (at your
+// option) any later version, as published by the Free Software Foundation.
+//
+// You can also Link and Combine this program with other software covered by
+// the terms of any of the Free Software licenses or any of the Open Source
+// Initiative approved licenses and Convey the resulting work. Corresponding
+// source of such a combination shall include the source code for all other
+// software used.
+//
+// This program is distributed WITHOUT ANY WARRANTY; without even the implied
+// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+//
+// See COPYING file for full licensing terms.
+// See https://www.nexedi.com/licensing for rationale and options.
+
+// XXX doctitle
+
+#include <Python.h>
+#if PY_MAJOR_VERSION < 3
+#include <Python-ast.h> // mod_ty & co
+#include <node.h>       // node
+#include <graminit.h>   // encoding_decl & co
+#include <ast.h>        // PyAST_FromNode & co
+#endif
+
+#include <funchook.h>
+
+// py2: wrap PyAST_FromNode so that "utf-8" becomes the default encoding
+#if PY_MAJOR_VERSION < 3
+static auto   _py_PyAST_FromNode = &PyAST_FromNode;
+static mod_ty gpy_PyAST_FromNode(const node* n, PyCompilerFlags* flags,
+                                 const char* filename, PyArena* arena)
+{
+//  fprintf(stderr, "gpy_PyAST_FromNode...\n");
+    PyCompilerFlags gflags = {.cf_flags = 0};
+    if (flags)
+        gflags = *flags;
+    if (TYPE(n) != encoding_decl)
+        gflags.cf_flags |= PyCF_SOURCE_IS_UTF8;
+    return _py_PyAST_FromNode(n, &gflags, filename, arena);
+}
+
+static funchook_t* gpy_PyAST_FromNode_hook;
+void _set_utf8_as_default_src_encoding() {
+    funchook_t *h;
+    int err;
+
+//  funchook_set_debug_file("/dev/stderr");
+
+    gpy_PyAST_FromNode_hook = h = funchook_create();
+    if (h == NULL) {
+        PyErr_NoMemory();
+        return;
+    }
+
+    err = funchook_prepare(h, (void**)&_py_PyAST_FromNode, (void*)gpy_PyAST_FromNode);
+    if (err != 0) {
+        PyErr_SetString(PyExc_RuntimeError, funchook_error_message(h));
+        return;
+    }
+
+    err = funchook_install(h, 0);
+    if (err != 0) {
+        PyErr_SetString(PyExc_RuntimeError, funchook_error_message(h));
+        return;
+    }
+
+    // ok
+}
+#else
+void _set_utf8_as_default_src_encoding() {}
+#endif
diff --git a/gpython/gpython_test.py b/gpython/gpython_test.py
index 420d0d4..355c2e7 100644
--- a/gpython/gpython_test.py
+++ b/gpython/gpython_test.py
@@ -47,20 +47,34 @@
 def runtime(request):
     yield request.param
 
+# strings is pytest fixture that yields all variants of should be supported gpython strings:
+# '' - not specified (gpython should autoselect)
+# 'bstr+ustr'
+# 'pystd'
+@pytest.fixture(scope="function", params=['', 'bstr+ustr', 'pystd'])
+def strings(request):
+    yield request.param
+
 # gpyenv returns environment appropriate for spawning gpython with
-# specified runtime.
-def gpyenv(runtime): # -> env
+# specified runtime and strings.
+def gpyenv(runtime, strings): # -> env
     env = os.environ.copy()
     if runtime != '':
         env['GPYTHON_RUNTIME'] = runtime
     else:
         env.pop('GPYTHON_RUNTIME', None)
+    if strings != '':
+        env['GPYTHON_STRINGS'] = strings
+    else:
+        env.pop('GPYTHON_STRINGS', None)
     return env
 
 
 @gpython_only
 def test_defaultencoding_utf8():
     assert sys.getdefaultencoding() == 'utf-8'
+    assert eval("u'αβγ'") == u'αβγ'     # FIXME fails on py2 which uses hardcoded default latin1
+    # XXX +exec, +run file
 
 @gpython_only
 def test_golang_builtins():
@@ -143,19 +157,42 @@ def assert_gevent_not_activated():
 
 
 @gpython_only
-def test_executable(runtime):
+def test_str_patched():
+    # gpython, by default, patches str/unicode to be bstr/ustr.
+    # handling of various string modes is explicitly tested in test_Xstrings.
+    assert_str_patched()
+
+def assert_str_patched():
+    #assert str.__name__ == ('bstr'  if PY2 else  'ustr')
+    assert str.__name__ == 'str'
+    assert str          is (bstr    if PY2 else  ustr)
+    if PY2:
+        assert unicode.__name__ == 'unicode'
+        assert unicode  is ustr
+    assert type('')     is str
+    assert type(b'')    is (bstr    if PY2 else  bytes)
+    assert type(u'')    is ustr
+
+def assert_str_not_patched():
+    assert str.__name__ == 'str'
+    assert str is not bstr
+    assert str is not ustr
+    if PY2:
+        assert unicode.__name__ == 'unicode'
+        assert unicode is not bstr
+        assert unicode is not ustr
+    assert type('')     is str
+    assert type(b'')    is bytes
+    assert type(u'')    is (unicode if PY2 else str)
+
+
+@gpython_only
+def test_executable():
     # sys.executable must point to gpython and we must be able to execute it.
-    import gevent
     assert 'gpython' in sys.executable
-    ver = pyout(['-c', 'import sys; print(sys.version)'], env=gpyenv(runtime))
+    ver = pyout(['-c', 'import sys; print(sys.version)'], env=gpyenv('', ''))
     ver = str(ver)
     assert ('[GPython %s]' % golang.__version__) in ver
-    if runtime != 'threads':
-        assert ('[gevent %s]'  % gevent.__version__)     in ver
-        assert ('[threads]')                         not in ver
-    else:
-        assert ('[gevent ')                          not in ver
-        assert ('[threads]')                             in ver
 
 
 # verify pymain.
@@ -322,15 +359,20 @@ def check(argv):
 # pymain -V/--version
 # gpython_only because output differs from !gpython.
 @gpython_only
-def test_pymain_ver(runtime):
+def test_pymain_ver(runtime, strings):
     from golang import b
     from gpython import _version_info_str as V
     import gevent
     vok = 'GPython %s' % golang.__version__
     if runtime != 'threads':
-        vok += ' [gevent %s]' % gevent.__version__
+        vok += ' [runtime gevent %s]' % gevent.__version__
     else:
-        vok += ' [threads]'
+        vok += ' [runtime threads]'
+
+    if strings != 'pystd':
+        vok += ' [strings bstr+ustr]'
+    else:
+        vok += ' [strings pystd]'
 
     if is_cpython:
         vok += ' / CPython %s' % platform.python_version()
@@ -341,10 +383,12 @@ def test_pymain_ver(runtime):
 
     vok += '\n'
 
-    ret, out, err = _pyrun(['-V'], stdout=PIPE, stderr=PIPE, env=gpyenv(runtime))
+    env = gpyenv(runtime, strings)
+
+    ret, out, err = _pyrun(['-V'], stdout=PIPE, stderr=PIPE, env=env)
     assert (ret, out, b(err)) == (0, b'', b(vok))
 
-    ret, out, err = _pyrun(['--version'], stdout=PIPE, stderr=PIPE, env=gpyenv(runtime))
+    ret, out, err = _pyrun(['--version'], stdout=PIPE, stderr=PIPE, env=env)
     assert (ret, out, b(err)) == (0, b'', b(vok))
 
 # verify that ./bin/gpython runs ok.
diff --git a/pyproject.toml b/pyproject.toml
index c19bed0..07ecad3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,2 @@
 [build-system]
-requires = ["setuptools", "wheel", "setuptools_dso >= 2.7", "cython", "gevent"]
+requires = ["setuptools", "wheel", "setuptools_dso >= 2.7", "cython < 3", "gevent"]
diff --git a/setup.py b/setup.py
index f3ef37f..f0539ee 100644
--- a/setup.py
+++ b/setup.py
@@ -42,9 +42,9 @@ def pygo_cy_builtin_type_name_get(self):
 from setuptools.command.develop import develop as _develop
 from distutils import sysconfig
 from os.path import dirname, join
-import sys, os, re
+import sys, os, re, platform, errno
 
-# read file content
+# read/write file content
 def readfile(path): # -> str
     with open(path, 'rb') as f:
         data = f.read()
@@ -52,6 +52,20 @@ def readfile(path): # -> str
             data = data.decode('utf-8')
         return data
 
+def writefile(path, data):
+    if not isinstance(data, bytes):
+        data = data.encode('utf-8')
+    with open(path, 'wb') as f:
+        f.write(data)
+
+# mkdir -p
+def mkdir_p(path):
+    try:
+        os.makedirs(path)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
+
 # reuse golang.pyx.build to build pygolang dso and extensions.
 # we have to be careful and inject synthetic golang package in order to be
 # able to import golang.pyx.build without built/working golang.
@@ -59,6 +73,7 @@ def readfile(path): # -> str
 exec(readfile('trun'), trun)
 trun['ximport_empty_golangmod']()
 from golang.pyx.build import setup, DSO, Extension as Ext
+from setuptools_dso import ProbeToolchain
 
 
 # grep searches text for pattern.
@@ -104,7 +119,7 @@ class XInstallGPython:
     # (script_name, script) -> (script_name, script)
     def transform_script(self, script_name, script):
         # on windows setuptools installs 3 files:
-        #   gpython-script.py
+        #   gpython-script.py           XXX do we need to adjust this similarly to pymain?
         #   gpython.exe
         #   gpython.exe.manifest
         # we want to override .py only.
@@ -173,8 +188,8 @@ def install_egg_scripts(self, dist):
 
 # requirements of packages under "golang." namespace
 R = {
-    'cmd.pybench':      {'pytest'},
-    'pyx.build':        {'setuptools', 'wheel', 'cython', 'setuptools_dso >= 2.7'},
+    'cmd.pybench':      {'pytest', 'py'},
+    'pyx.build':        {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.7'},
     'x.perf.benchlib':  {'numpy'},
 }
 # TODO generate `a.b -> a`, e.g. x.perf = join(x.perf.*); x = join(x.*)
@@ -184,7 +199,8 @@ def install_egg_scripts(self, dist):
 R['all'] = Rall
 
 # ipython/pytest are required to test py2 integration patches
-R['all_test'] = Rall.union(['ipython', 'pytest']) # pip does not like "+" in all+test
+# zodbpickle is used to test pickle support for bstr/ustr
+R['all_test'] = Rall.union(['ipython', 'pytest', 'zodbpickle']) # pip does not like "+" in all+test
 
 # extras_require <- R
 extras_require = {}
@@ -200,6 +216,206 @@ def get_python_libdir():
     else:
         return sysconfig.get_config_var('LIBDIR')
 
+# funchook_dso is DSO for libfunchook.so or None if CPU is not supported.
+def _():
+    cpu = platform.machine()
+    if re.match('x86|i.86|x86_64|amd64', cpu, re.I):
+        cpu = 'x86'
+        disasm = 'distorm'
+    elif re.match('aarch64|arm64', cpu, re.I):
+        cpu = 'arm64'
+        disasm = 'capstone'
+    else:
+        return None # no funchook support
+
+    # XXX temp test XXX no -> we need capstone for disasm
+    disasm = 'capstone'
+
+    if platform.system() == 'Windows':
+        os   = 'windows'
+        libv = ['psapi']
+    else:
+        os   = 'unix'
+        libv = ['dl']
+
+    FH = '3rdparty/funchook/'
+    srcv = [FH+'src/funchook.c',
+            FH+'src/funchook_%s.c' % cpu,
+            FH+'src/funchook_%s.c' % os,
+            FH+'src/disasm_%s.c'   % disasm]
+    depv = [FH+'include/funchook.h',
+            FH+'src/disasm.h',
+            FH+'src/funchook_arm64.h',
+            FH+'src/funchook_internal.h',
+            FH+'src/funchook_x86.h']
+    incv = [FH+'include']
+    defv = ['FUNCHOOK_EXPORTS']
+
+    if disasm == 'distorm':
+        D3 = '3rdparty/funchook/distorm/'
+        srcv += [D3+'src/decoder.c',
+                 D3+'src/distorm.c',
+                 D3+'src/instructions.c',
+                 D3+'src/insts.c',
+                 D3+'src/mnemonics.c',
+                 D3+'src/operands.c',
+                 D3+'src/prefix.c',
+                 D3+'src/textdefs.c']
+        depv += [D3+'include/distorm.h',
+                 D3+'include/mnemonics.h',
+                 D3+'src/config.h',
+                 D3+'src/decoder.h',
+                 D3+'src/instructions.h',
+                 D3+'src/insts.h',
+                 D3+'src/operands.h',
+                 D3+'src/prefix.h',
+                 D3+'src/textdefs.h',
+                 D3+'src/wstring.h',
+                 D3+'src/x86defs.h']
+        incv += [D3+'include']
+
+    if disasm == 'capstone':
+        CS = '3rdparty/capstone/'
+        srcv += [CS+'cs.c',
+                 CS+'Mapping.c',
+                 CS+'MCInst.c',
+                 CS+'MCInstrDesc.c',
+                 CS+'MCRegisterInfo.c',
+                 CS+'SStream.c',
+                 CS+'utils.c']
+        depv += [CS+'cs_simple_types.h',
+                 CS+'cs_priv.h',
+                 CS+'LEB128.h',
+                 CS+'Mapping.h',
+                 CS+'MathExtras.h',
+                 CS+'MCDisassembler.h',
+                 CS+'MCFixedLenDisassembler.h',
+                 CS+'MCInst.h',
+                 CS+'MCInstrDesc.h',
+                 CS+'MCRegisterInfo.h',
+                 CS+'SStream.h',
+                 CS+'utils.h']
+        incv += [CS+'include']
+
+        depv += [CS+'include/capstone/arm64.h',
+                 CS+'include/capstone/arm.h',
+                 CS+'include/capstone/capstone.h',
+                 CS+'include/capstone/evm.h',
+                 CS+'include/capstone/wasm.h',
+                 CS+'include/capstone/mips.h',
+                 CS+'include/capstone/ppc.h',
+                 CS+'include/capstone/x86.h',
+                 CS+'include/capstone/sparc.h',
+                 CS+'include/capstone/systemz.h',
+                 CS+'include/capstone/xcore.h',
+                 CS+'include/capstone/m68k.h',
+                 CS+'include/capstone/tms320c64x.h',
+                 CS+'include/capstone/m680x.h',
+                 CS+'include/capstone/mos65xx.h',
+                 CS+'include/capstone/bpf.h',
+                 CS+'include/capstone/riscv.h',
+                 CS+'include/capstone/sh.h',
+                 CS+'include/capstone/tricore.h',
+                 CS+'include/capstone/platform.h']
+
+        defv += ['CAPSTONE_SHARED', 'CAPSTONE_USE_SYS_DYN_MEM']
+
+        if cpu == 'arm64':
+            defv += ['CAPSTONE_HAS_ARM64']
+            srcv += [CS+'arch/AArch64/AArch64BaseInfo.c',
+                     CS+'arch/AArch64/AArch64Disassembler.c',
+                     CS+'arch/AArch64/AArch64InstPrinter.c',
+                     CS+'arch/AArch64/AArch64Mapping.c',
+                     CS+'arch/AArch64/AArch64Module.c']
+            depv += [CS+'arch/AArch64/AArch64AddressingModes.h',
+                     CS+'arch/AArch64/AArch64BaseInfo.h',
+                     CS+'arch/AArch64/AArch64Disassembler.h',
+                     CS+'arch/AArch64/AArch64InstPrinter.h',
+                     CS+'arch/AArch64/AArch64Mapping.h',
+                     CS+'arch/AArch64/AArch64GenAsmWriter.inc',
+                     CS+'arch/AArch64/AArch64GenDisassemblerTables.inc',
+                     CS+'arch/AArch64/AArch64GenInstrInfo.inc',
+                     CS+'arch/AArch64/AArch64GenRegisterInfo.inc',
+                     CS+'arch/AArch64/AArch64GenRegisterName.inc',
+                     CS+'arch/AArch64/AArch64GenRegisterV.inc',
+                     CS+'arch/AArch64/AArch64GenSubtargetInfo.inc',
+                     CS+'arch/AArch64/AArch64GenSystemOperands.inc',
+                     CS+'arch/AArch64/AArch64GenSystemOperands_enum.inc',
+                     CS+'arch/AArch64/AArch64MappingInsn.inc',
+                     CS+'arch/AArch64/AArch64MappingInsnName.inc',
+                     CS+'arch/AArch64/AArch64MappingInsnOp.inc']
+
+        if cpu == 'x86':
+            defv += ['CAPSTONE_HAS_X86']
+            srcv += [CS+'arch/X86/X86ATTInstPrinter.c',     # !diet
+                     CS+'arch/X86/X86Disassembler.c',
+                     CS+'arch/X86/X86DisassemblerDecoder.c',
+                     CS+'arch/X86/X86IntelInstPrinter.c',
+                     CS+'arch/X86/X86InstPrinterCommon.c',
+                     CS+'arch/X86/X86Mapping.c',
+                     CS+'arch/X86/X86Module.c']
+            depv += [CS+'arch/X86/X86BaseInfo.h',
+                     CS+'arch/X86/X86Disassembler.h',
+                     CS+'arch/X86/X86DisassemblerDecoder.h',
+                     CS+'arch/X86/X86DisassemblerDecoderCommon.h',
+                     CS+'arch/X86/X86GenAsmWriter.inc',
+                     CS+'arch/X86/X86GenAsmWriter1.inc',
+                     CS+'arch/X86/X86GenAsmWriter1_reduce.inc',
+                     CS+'arch/X86/X86GenAsmWriter_reduce.inc',
+                     CS+'arch/X86/X86GenDisassemblerTables.inc',
+                     CS+'arch/X86/X86GenDisassemblerTables_reduce.inc',
+                     CS+'arch/X86/X86GenInstrInfo.inc',
+                     CS+'arch/X86/X86GenInstrInfo_reduce.inc',
+                     CS+'arch/X86/X86GenRegisterInfo.inc',
+                     CS+'arch/X86/X86InstPrinter.h',
+                     CS+'arch/X86/X86Mapping.h',
+                     CS+'arch/X86/X86MappingInsn.inc',
+                     CS+'arch/X86/X86MappingInsnOp.inc',
+                     CS+'arch/X86/X86MappingInsnOp_reduce.inc',
+                     CS+'arch/X86/X86MappingInsn_reduce.inc']
+
+    # config.h
+    probe = ProbeToolchain()
+    config_h = []
+    def cfgemit(line):
+        config_h.append(line+'\n')
+    def defif(name, ok):
+        if ok:
+            cfgemit('#define %s 1' % name)
+        else:
+            cfgemit('#undef  %s'   % name)
+
+    for d in ('capstone', 'distorm', 'zydis'):
+        defif('DISASM_%s' % d.upper(), d == disasm)
+
+    cfgemit('#define SIZEOF_VOID_P %d' % probe.sizeof('void*'))
+
+    defif('_GNU_SOURCE', 1)
+    defif('GNU_SPECIFIC_STRERROR_R', probe.try_compile("""
+#define _GNU_SOURCE 1
+#include <string.h>
+int main()
+{
+    char dummy[128];
+    return *strerror_r(0, dummy, sizeof(dummy));
+}
+"""))
+
+    fbuild_src = 'build/3rdparty/funchook/src'
+    mkdir_p(fbuild_src)
+    writefile(fbuild_src+'/config.h', ''.join(config_h))
+    incv  += [fbuild_src]
+
+    return DSO('golang.runtime.funchook', srcv,
+               depends         = depv,
+               language        = 'c',
+               include_dirs    = incv,
+               define_macros   = [(_, None) for _ in defv],
+               libraries       = libv,
+               soversion       = '1.1')
+funchook_dso = _()
+
+
 setup(
     name        = 'pygolang',
     version     = version,
@@ -225,6 +441,7 @@ def get_python_libdir():
                         ['golang/runtime/libgolang.cpp',
                          'golang/runtime/internal/atomic.cpp',
                          'golang/runtime/internal/syscall.cpp',
+                         'golang/runtime.cpp',
                          'golang/context.cpp',
                          'golang/errors.cpp',
                          'golang/fmt.cpp',
@@ -236,9 +453,11 @@ def get_python_libdir():
                          'golang/time.cpp'],
                         depends = [
                             'golang/libgolang.h',
+                            'golang/runtime.h',
                             'golang/runtime/internal.h',
                             'golang/runtime/internal/atomic.h',
                             'golang/runtime/internal/syscall.h',
+                            'golang/runtime/platform.h',
                             'golang/context.h',
                             'golang/cxx.h',
                             'golang/errors.h',
@@ -259,12 +478,21 @@ def get_python_libdir():
                         include_dirs    = [sysconfig.get_python_inc()],
                         library_dirs    = [get_python_libdir()],
                         define_macros   = [('BUILDING_LIBPYXRUNTIME', None)],
-                        soversion       = '0.1')],
+                        soversion       = '0.1')]
+                    + ([funchook_dso] if funchook_dso else []),
 
     ext_modules = [
                     Ext('golang._golang',
-                        ['golang/_golang.pyx'],
-                        depends = ['golang/_golang_str.pyx']),
+                        ['golang/_golang.pyx',
+                         'golang/_golang_str_pickle.S'],
+                        depends = [
+                            'golang/_golang_str.pyx',
+                            'golang/_golang_str_pickle.pyx',
+                            'golang/_golang_str_pickle_test.pyx',
+                            'golang/_golang_str_pickle.S'],
+                        dsos = ['golang.runtime.funchook'], # XXX only if available
+                        include_dirs = ['3rdparty/funchook/include',
+                                        '3rdparty/capstone/include']),
 
                     Ext('golang.runtime._runtime_thread',
                         ['golang/runtime/_runtime_thread.pyx']),
@@ -334,6 +562,14 @@ def get_python_libdir():
                     Ext('golang._time',
                         ['golang/_time.pyx'],
                         dsos = ['golang.runtime.libpyxruntime']),
+
+                    # XXX consider putting everything into just gpython.pyx + .c
+                    Ext('gpython._gpython',
+                        ['gpython/_gpython.pyx',
+                         'gpython/_gpython_c.cpp'],    # XXX do we need C++ here?
+                        include_dirs =  ['3rdparty/funchook/include'],
+                        dsos = ['golang.runtime.funchook'], # XXX only if available
+                    ),
                   ],
     include_package_data = True,
 

From e035c704d6fc44abf2c2c6853bfb5609205725a9 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Tue, 30 Jan 2024 15:07:53 +0300
Subject: [PATCH 20/29] X Bring y/bstr+x/gpystr to be at least usable

Asked by Kazuhiko: https://lab.nexedi.com/nexedi/pygolang/-/merge_requests/21#note_198526
---
 golang/_golang_str.pyx | 2 +-
 gpython/__init__.py    | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index d7282a0..78bbd40 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -2402,7 +2402,7 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
         assert isinstance(x, type)
         xtyp  = <PyTypeObject*>x
         _xtyp = <_XPyTypeObject*>x
-        fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
+        #fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
         assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
         xtyp.tp_flags &= ~Py_TPFLAGS_READY
         Py_CLEAR(_xtyp.tp_mro)
diff --git a/gpython/__init__.py b/gpython/__init__.py
index 14b2fce..f2225f8 100755
--- a/gpython/__init__.py
+++ b/gpython/__init__.py
@@ -474,7 +474,6 @@ def init():
         from six.moves import builtins
         for k in golang.__all__:
             setattr(builtins, k, getattr(golang, k))
-        setattr(builtins, 'CCC', CCC)
 
         # sys.version
         sys.version += (' [GPython %s] [runtime %s] [strings %s]' % (golang.__version__, gpy_runtime_ver, gpy_strings))
@@ -596,7 +595,7 @@ def __next__(self):
 
 
 # for tests XXX continue by first writing test  XXX
-1/0
+#1/0
 class _tEarlyStrSubclass(str):
     pass
 

From ac87a2ed0ca039ceeda98dcaf7445cdee03f3cd0 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Wed, 24 Apr 2024 19:38:42 +0300
Subject: [PATCH 21/29] X Update on my draft state of x/gpystr work

Please see demo/pickle_py2_gpy3_demo.py and demo/ZODB_py2_gpy3_demo.py
for details of how pickle compatibility problem is solved in between py2 and py3.
---
 demo/.gitignore                     |   2 +
 demo/ZODB_py2_gpy3_demo.py          |  55 +++++
 demo/pickle_py2_gpy3_demo.py        |  68 ++++++
 golang/__init__.py                  |   6 +-
 golang/_golang.pyx                  |   2 +-
 golang/_golang_str.pyx              | 209 +++++++++++++++++--
 golang/_golang_str_pickle.S         |   8 +-
 golang/_golang_str_pickle.pyx       | 311 ++++++++++++++++++++++++----
 golang/_golang_str_pickle_test.pyx  |  20 +-
 golang/_gopath.py                   |  40 +++-
 golang/_strconv.pyx                 |   2 +-
 golang/fmt.h                        |   2 +-
 golang/golang_str_pickle_test.py    | 286 ++++++++++++++++++-------
 golang/golang_str_test.py           |  47 ++++-
 golang/golang_test.py               |   6 +
 golang/libgolang.h                  |   2 +-
 golang/os.cpp                       |   2 +-
 golang/os.h                         |   2 +-
 golang/os/signal.cpp                |   2 +-
 golang/pyx/build.py                 |   2 +-
 golang/runtime.cpp                  |   4 +-
 golang/runtime.h                    |   4 +-
 golang/runtime/internal/atomic.cpp  |   2 +-
 golang/runtime/internal/syscall.cpp |   2 +-
 golang/runtime/internal/syscall.h   |   2 +-
 golang/runtime/libgolang.cpp        |   2 +-
 golang/runtime/platform.h           |   4 +-
 gpython/__init__.py                 |  12 +-
 gpython/_gpython.pyx                |   4 +-
 gpython/_gpython_c.cpp              |   4 +-
 gpython/gpython_test.py             |   2 +-
 pyproject.toml                      |   2 +-
 setup.py                            |   7 +-
 tox.ini                             |  12 +-
 trun                                |  22 +-
 35 files changed, 952 insertions(+), 207 deletions(-)
 create mode 100644 demo/.gitignore
 create mode 100755 demo/ZODB_py2_gpy3_demo.py
 create mode 100755 demo/pickle_py2_gpy3_demo.py

diff --git a/demo/.gitignore b/demo/.gitignore
new file mode 100644
index 0000000..a7a5eac
--- /dev/null
+++ b/demo/.gitignore
@@ -0,0 +1,2 @@
+x.pkl
+data.fs*
diff --git a/demo/ZODB_py2_gpy3_demo.py b/demo/ZODB_py2_gpy3_demo.py
new file mode 100755
index 0000000..45a2405
--- /dev/null
+++ b/demo/ZODB_py2_gpy3_demo.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Program ZODB_py2_gpy3_demo demonstrates interoperability in between py2 and py3
+# regarding pickled strings in ZODB.
+#
+# It is similar to pickle_py2_gpy3_demo, but persists data inside ZODB instead
+# of raw pickle file.
+#
+# Please see pickle_py2_gpy3_demo for details.
+
+from __future__ import print_function
+
+from persistent import Persistent
+from ZODB.FileStorage import FileStorage
+from ZODB.DB import DB
+import transaction
+
+from zodbpickle import fastpickle as pickle
+import pickletools
+import sys
+
+
+class MyClass(Persistent):
+    __slots__ = ('data',)
+
+def main():
+    print(sys.version)
+
+    # adjust FileStorage magic so that py3 does not refuse to load FileStorage produced on py2
+    fsmod = __import__('ZODB.FileStorage.FileStorage', fromlist=['ZODB'])
+    assert hasattr(fsmod, 'packed_version')
+    fsmod.packed_version = b'FS21'
+
+    stor = FileStorage('data.fs')
+    db   = DB(stor)
+    conn = db.open()
+    root = conn.root
+
+    if not hasattr(root, 'obj'):
+        root.obj = obj = MyClass()
+        obj.data = u'αβγ'.encode('utf-8')
+    else:
+        print('\nloading data:')
+        obj = root.obj
+        print('\n-> %r\t(%s)' % (obj.data, obj.data))
+
+        obj.data += b' %d' % len(obj.data)
+
+    print('\nsaving data: %r\t(%s)' % (obj.data, obj.data))
+    transaction.commit()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/demo/pickle_py2_gpy3_demo.py b/demo/pickle_py2_gpy3_demo.py
new file mode 100755
index 0000000..d107675
--- /dev/null
+++ b/demo/pickle_py2_gpy3_demo.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Program pickle_py2_gpy3_demo demonstrates interoperability in between py2 and py3
+# regarding pickled strings.
+#
+# It initially saves non-ASCII string in pickled form into a file, and on
+# further runs tries to load saved object back, appends some tail data to it,
+# and saves the result again.
+#
+# When run on plain py2 everything works as expected: string is initially
+# persisted ok, then loaded ok as the same str object, which can be worked with
+# as expected, and persisted again ok.
+#
+# When plain py3 runs this program on the file prepared by py2, loading pickle
+# data breaks because, by default, py3 wants to decode *STRING opcodes as ASCII
+# and the saved string is not ASCII.
+#
+# However when run under gpy3, the string is loaded ok as bstr. Since bstr has the
+# same semantic as regular str on py2, working with that object produces the
+# same result plain py2 would produce when adjusting the data. And then, bstr
+# is also persisted ok and via the same *STRING opcodes, that py2 originally
+# used for the data.
+#
+# This way both py2 and gpy3 can interoperate on the same database: py2 can
+# produce data, gpy3 can read the data and modify it, and further py2 can load
+# updated data, again, just ok.
+
+from __future__ import print_function
+
+from zodbpickle import fastpickle as pickle
+import pickletools
+from os.path import exists
+import sys
+
+def main():
+    stor = 'x.pkl'
+
+    print(sys.version)
+
+    if not exists(stor):
+        obj = u'αβγ'.encode('utf-8')
+    else:
+        pkl = readfile(stor)
+        print('\nloading pickle:')
+        pickletools.dis(pkl)
+        obj = pickle.loads(pkl)
+        print('\n-> %r\t(%s)' % (obj, obj))
+
+        obj += b' %d' % len(obj)
+
+    print('\nsaving obj: %r\t(%s)' % (obj, obj))
+    pkl = pickle.dumps(obj)
+    pickletools.dis(pkl)
+    writefile(stor, pkl)
+
+
+def readfile(path):
+    with open(path, 'rb') as f:
+        return f.read()
+
+def writefile(path, data):
+    with open(path, 'wb') as f:
+        f.write(data)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/golang/__init__.py b/golang/__init__.py
index e773775..00babf6 100644
--- a/golang/__init__.py
+++ b/golang/__init__.py
@@ -38,13 +38,13 @@
 __all__ = ['go', 'chan', 'select', 'default', 'nilchan', 'defer', 'panic',
            'recover', 'func', 'error', 'b', 'u', 'bstr', 'ustr', 'bbyte', 'uchr', 'gimport']
 
+import setuptools_dso
+setuptools_dso.dylink_prepare_dso('golang.runtime.libgolang')
+
 from golang._gopath import gimport  # make gimport available from golang
 import inspect, sys
 import decorator, six
 
-import setuptools_dso
-setuptools_dso.dylink_prepare_dso('golang.runtime.libgolang')
-
 from golang._golang import _pysys_exc_clear as _sys_exc_clear
 
 # @func is a necessary decorator for functions for selected golang features to work.
diff --git a/golang/_golang.pyx b/golang/_golang.pyx
index b857197..689d6a1 100644
--- a/golang/_golang.pyx
+++ b/golang/_golang.pyx
@@ -5,7 +5,7 @@
 # distutils: language = c++
 # distutils: depends = libgolang.h os/signal.h unicode/utf8.h _golang_str.pyx _golang_str_pickle.pyx
 #
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 78bbd40..3e4a64f 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -34,7 +34,9 @@ from cpython.iterobject cimport PySeqIter_New
 from cpython cimport PyThreadState_GetDict, PyDict_SetItem
 from cpython cimport PyObject_CheckBuffer
 from cpython cimport Py_TPFLAGS_HAVE_GC, Py_TPFLAGS_HEAPTYPE, Py_TPFLAGS_READY, PyType_Ready
+from cpython cimport Py_TPFLAGS_VALID_VERSION_TAG
 from cpython cimport PyBytes_Format, PyUnicode_Format, PyObject_Str
+from cpython cimport PyObject_GetAttr, PyObject_SetAttr
 
 cdef extern from "Python.h":
     PyTypeObject PyBytes_Type
@@ -408,7 +410,6 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             else:
                 return pyb(x)
 
-    # XXX temp disabled
     # __iter__  - yields unicode characters
     def __iter__(self):
         # TODO iterate without converting self to u
@@ -1145,7 +1146,7 @@ cdef _bstringify(object obj): # -> unicode|bytes
     _bstringify_enter()
 
     try:
-        if False:   # PY_MAJOR_VERSION >= 3:
+        if False:   # PY_MAJOR_VERSION >= 3:                # XXX restore ?
             # NOTE this depends on patches to bytes.{__repr__,__str__} below
             return unicode(obj)
 
@@ -1251,7 +1252,7 @@ def _():
     cdef PyTypeObject* t
     # NOTE patching bytes and its already-created subclasses that did not override .tp_repr/.tp_str
     # NOTE if we don't also patch __dict__ - e.g. x.__repr__() won't go through patched .tp_repr
-    for pyt in [bytes] + bytes.__subclasses__():
+    for pyt in [bytes] + bytes.__subclasses__():    # FIXME also handle sub-sub-classes
         assert isinstance(pyt, type)
         t = <PyTypeObject*>pyt
         if t.tp_repr == _bytes_tp_repr:
@@ -1264,7 +1265,7 @@ _()
 
 if PY_MAJOR_VERSION < 3:
     def _():
-        cdef PyTypeObject* t
+        cdef PyTypeObject* t    # FIXME also handle sub-sub-classes
         for pyt in [unicode] + unicode.__subclasses__():
             assert isinstance(pyt, type)
             t = <PyTypeObject*>pyt
@@ -1301,7 +1302,7 @@ cdef object _unicode_x__ge__(object a, object b):   return _unicode_tp_richcompa
 if PY_MAJOR_VERSION < 3:
     def _():
         cdef PyTypeObject* t
-        for pyt in [unicode] + unicode.__subclasses__():
+        for pyt in [unicode] + unicode.__subclasses__():    # XXX sub-sub-classes
             assert isinstance(pyt, type)
             t = <PyTypeObject*>pyt
             if t.tp_richcompare == _unicode_tp_richcompare:
@@ -1385,7 +1386,7 @@ def _bytearray_x__iadd__(a, b): return _bytearray_sq_xiconcat(a, b)
 
 def _():
     cdef PyTypeObject* t
-    for pyt in [bytearray] + bytearray.__subclasses__():
+    for pyt in [bytearray] + bytearray.__subclasses__():    # XXX sub-sub-classes
         assert isinstance(pyt, type)
         t = <PyTypeObject*>pyt
         if t.tp_repr == _bytearray_tp_repr:
@@ -1408,7 +1409,7 @@ def _():
 _()
 
 
-# _bytearray_data return raw data in bytearray as bytes.
+# _bytearray_data returns raw data in bytearray as bytes.
 # XXX `bytearray s` leads to `TypeError: Expected bytearray, got hbytearray`
 cdef bytes _bytearray_data(object s):
     if PY_MAJOR_VERSION >= 3:
@@ -1849,6 +1850,7 @@ class _BFormatter(pystring.Formatter):
 # XXX place, comments
 # str % ... : ceval on py2 and py3 < 3.11 invokes PyString_Format / PyUnicode_Format
 #   directly upon seeing BINARY_MODULO. This leads to bstr.__mod__ not being called.
+# XXX -> patch PyString_Format / PyUnicode_Format to invoke our .__mod__ ...
 ctypedef unicode uformatfunc(object, object)
 ctypedef bytes   bformatfunc(object, object)
 cdef uformatfunc* _punicode_Format = PyUnicode_Format
@@ -1867,7 +1869,7 @@ cdef _patch_capi_str_format():
 
 
 # XXX place, comments, test
-#py3.11: specializes instructions. e.g. ustr(obj) will specialize (after
+# py3.11: specializes instructions. e.g. ustr(obj) will specialize (after
 #    executing 8 times) to directly invoke
 #
 #   PyObject_Str(obj)
@@ -1889,6 +1891,37 @@ cdef  _patch_capi_object_str():
     cpatch(<void**>&_pobject_Str, <void*>_object_xStr)
 
 
+# XXX place, comments, test
+# on py3 PyObject_GetAttr & co insist on name to be unicode
+# XXX _PyObject_LookupAttr
+# XXX _PyObject_GenericGetAttrWithDict
+# XXX _PyObject_GenericSetAttrWithDict
+# XXX type_getattro
+IF PY3:
+    ctypedef object obj_getattr_func(object, object)
+    ctypedef int    obj_setattr_func(object, object, object) except -1
+
+    cdef obj_getattr_func* _pobject_GetAttr = PyObject_GetAttr
+    cdef obj_setattr_func* _pobject_SetAttr = PyObject_SetAttr
+
+    cdef object _object_xGetAttr(object obj, object name):
+#       fprintf(stderr, "xgetattr...\n")
+        if isinstance(name, pybstr):
+            name = pyustr(name)
+        return _pobject_GetAttr(obj, name)
+
+    cdef int    _object_xSetAttr(object obj, object name, object v) except -1:
+#       fprintf(stderr, "xsetattr...\n")
+        if isinstance(name, pybstr):
+            name = pyustr(name)
+        return _pobject_SetAttr(obj, name, v)
+
+
+cdef _patch_capi_object_attr_bstr():
+    IF PY3:
+        cpatch(<void**>&_pobject_GetAttr, <void*>_object_xGetAttr)
+        cpatch(<void**>&_pobject_SetAttr, <void*>_object_xSetAttr)
+
 
 # ---- misc ----
 
@@ -2213,6 +2246,7 @@ cdef _patch_str():
             upreserve_slots)
     pyustr = unicode    # retarget pyustr -> unicode to where it was copied
     # XXX vvv needed so that patched unicode could be saved by py2:cPickle at all
+    # XXX vvv should be done by pytype_replace... ?  just us original unicode.tp_name ?
     (<PyTypeObject*>pyustr).tp_name = ("unicode" if PY_MAJOR_VERSION < 3  else "str")
 
     # py2: patch str to be pybstr
@@ -2248,6 +2282,7 @@ cdef _patch_str():
 
     _patch_capi_str_format()
     _patch_capi_object_str()
+    _patch_capi_object_attr_bstr()
     _patch_capi_unicode_decode_as_bstr()
     _patch_str_pickle()
     # ...
@@ -2259,16 +2294,16 @@ cdef _patch_str():
 include '_golang_str_pickle.pyx'
 
 # _pytype_clone clones PyTypeObject src into dst.
-# dst must not be previously initialized.
 #
-# dst will have reference-count = 1 meaning new reference to it is returned.
+# src must be not heap-allocated type.
+# dst must be statically allocated and not previously initialized.
+#
+# dst will have reference-count = 1 meaning new reference to the clone is returned.
 cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
     assert (src.tp_flags & Py_TPFLAGS_READY) != 0
     assert (src.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0    # src is not allocated on heap
-    #assert not PyType_IS_GC((<PyObject*>src).ob_type)  # XXX not true as unicode.ob_type is PyType_Type
-                                                        #     which generally has GC support, but
-                                                        #     GC is deactivated for non-heap types.
-    # copy the struct   XXX + .ob_next / .ob_prev (Py_TRACE_REFS)
+                                                        # and so GC for it is disabled
+    # copy the struct   XXX + ._ob_next / ._ob_prev (Py_TRACE_REFS) (set to NULL)
     dst[0] = src[0]
     (<PyObject*>dst).ob_refcnt = 1
 
@@ -2277,6 +2312,7 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
 
     # now reinitialize things like .tp_dict etc, where PyType_Ready built slots that point to src.
     # we want all those slots to be rebuilt and point to dst instead.
+    # XXX test
     _dst = <_XPyTypeObject*>dst
     dst .tp_flags &= ~Py_TPFLAGS_READY
     dst .tp_dict     = NULL
@@ -2286,10 +2322,17 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
     _dst.tp_weaklist = NULL
 
     # dst.__subclasses__ will be empty because existing children inherit from src, not from dst.
+    # XXX but ustr, after copy to unicode, will inherit from unicode(pystd)  -- recheck
+    # XXX test
     _dst.tp_subclasses = NULL
 
+    # XXX -> common reinherit fixup
+    if _dst.tp_init == (<_XPyTypeObject*>(dst.tp_base)).tp_init:
+        _dst.tp_init = NULL
+
     PyType_Ready(<object>dst)
     assert (dst.tp_flags & Py_TPFLAGS_READY) != 0
+    assert (dst.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
 
 # _pytype_replace_by_child replaces typ by its child egg.
 #
@@ -2305,8 +2348,10 @@ cdef _pytype_clone(PyTypeObject *src, PyTypeObject *dst, const char* new_name):
 #            ↑                       ↑
 #            Y                       Y
 #
+# typ and egg must be static non heap-allocated types.
+#
 # typ_clone must be initialized via _pytype_clone(typ, typ_clone).
-# egg' is egg clone put inplace of typ
+# egg' is egg clone put inplace of typ.
 #
 # XXX preserve_slots - describe
 cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
@@ -2323,15 +2368,11 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
     assert (egg.tp_flags & Py_TPFLAGS_READY)  != 0
 
     assert (typ.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
-    assert (egg.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0 # XXX will be not true
-                                                     # -> ! Py_TPFLAGS_HAVE_GC
-                                                     # -> ? set Py_TPFLAGS_HEAPTYPE back on typ' ?
+    assert (egg.tp_flags & Py_TPFLAGS_HEAPTYPE) == 0
 
     # (generally not required)
     assert (typ.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
     assert (egg.tp_flags & Py_TPFLAGS_HAVE_GC) == 0
-    # XXX also check PyObject_IS_GC  (verifies .tp_is_gc() = n)  ?
-
 
     assert vtyp.ob_size               ==  vegg.ob_size
     assert typ .tp_basicsize          ==  egg .tp_basicsize
@@ -2353,11 +2394,14 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
     Py_CLEAR(_egg.tp_bases)
     Py_CLEAR(_egg.tp_mro)
     Py_CLEAR(_egg.tp_cache)
+    # XXX 3.12 +tp_watched
 
     # typ <- egg  preserving original typ's refcnt, weak references and subclasses\egg.
     # typ will be now playing the role of egg
     typ_refcnt     = otyp.ob_refcnt
+    # XXX py3.12 "For the static builtin types this is always NULL, even if weakrefs are added ..."
     typ_weaklist   = _typ.tp_weaklist
+    # XXX py3.12 "May be an invalid pointer" (for static builtin types it became `size_t index`
     typ_subclasses = _typ.tp_subclasses
     typ[0] = egg[0]
     otyp.ob_refcnt     = typ_refcnt
@@ -2376,6 +2420,63 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
     # live in .tp_dict and point to their type. Do it for both typ (new egg)
     # and origin egg for generality, even though original egg won't be used
     # anymore.
+    #
+    # XXX also check which pointers/other things are propagated from base to
+    #     subclasses. It is e.g. tp_new but others might be as well.
+    #
+    # https://docs.python.org/3/c-api/typeobj.html -> inheritance + defaults:
+    #
+    # D(default):
+    #     tp_base     X
+    #     tp_dict     ?
+    #     tp_alloc    ?
+    #     tp_new      ?
+    #     tp_free     ?
+    #
+    #     <tp_bases>  ~
+    #     <tp_mro>    ~
+    #
+    # I(inherited):
+    #     ob_type                       ==  &PyType_Type
+    #   + tp_basicsize                  ==
+    #   + tp_itemsize                   ==
+    #     tp_dealloc
+    #   + tp_vectorcall_offset          ==
+    #     tp_getattr / tp_getattro
+    #     tp_setattr / tp_setattro      NULL
+    #     tp_as_async                   NULL
+    #     tp_repr
+    #     tp_as_number                  for %
+    #     tp_as_sequence                len concat repeat sq_item contains ...
+    #     tp_as_mapping                 len subscript
+    #     tp_hash / tp_richcompare
+    #     tp_call                       NULL
+    #     tp_str
+    #     tp_as_buffer                  NULL(unicode)  !NULL(ustr)
+    #     tp_flags                      XXX recheck how flags are rebuild by PyTypes_Ready
+    #     tp_traverse / tp_clear        NULL    <- Py_TPFLAGS_HAVE_GC
+    #     tp_clear                      NULL
+    #   + tp_weaklistoffset
+    #     tp_iter
+    #     tp_iternext                   NULL
+    #     tp_descr_get                  NULL
+    #     tp_descr_set                  NULL
+    #   + tp_dictoffset                 0
+    #     tp_init                       NULL
+    #     tp_alloc                      == (PyType_GenericAlloc)
+    #     tp_new
+    #     tp_free                       XXX recheck
+    #     tp_is_gc                      NULL
+    #     tp_finalize                   NULL
+    #
+    # XXX also check PyHeapTypeObject
+
+    # don't let PyType_Ready to create __init__ if tp_init is inherited
+    if _typ.tp_init == (<_XPyTypeObject*>(typ.tp_base)).tp_init:
+        _typ.tp_init = NULL
+    if _egg.tp_init == (<_XPyTypeObject*>(egg.tp_base)).tp_init:
+        _egg.tp_init = NULL
+
     typ.tp_flags &= ~Py_TPFLAGS_READY
     egg.tp_flags &= ~Py_TPFLAGS_READY
     PyType_Ready(<object>typ)
@@ -2398,11 +2499,72 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
     # initially X.__mro__ = (X, typ, base) and without rebuilding it would
     # remain (X, egg', base) instead of correct (X, egg' typ_clone, base)
     # XXX py3 does this automatically?  XXX -> no, it can invalidate .__mro__, but not .tp_mro
+
+    # refresh fields related to X inheriting from its base.
+    # currents state of base is Bnew.
+    # old state of base is represented by Bold.
+    # NOTE for first-level children of typ Bnew=egg' and Bold=typ_clone
+    #      for further levels Bnew=bold
+    def inherit_refresh(X, Bold, Bnew):
+        # depth-first
+        for Y in X.__subclasses__():
+            inherit_refresh(Y, X, X)
+        assert isinstance(Bold, type)
+        assert isinstance(Bnew, type)
+        assert isinstance(X,    type)
+        o  = <PyTypeObject*>Bold    ; _o = <_XPyTypeObject*>Bold
+        b  = <PyTypeObject*>Bnew    ; _b = <_XPyTypeObject*>Bnew
+        x  = <PyTypeObject*>X       ; _x = <_XPyTypeObject*>X
+#       fprintf(stderr, 'refresh  %s\t<- %s', x.tp_name, b.tp_name)
+#       if Bold is not Bnew:
+#           fprintf(stderr, '\t#  was <- %s', o.tp_name)
+#       fprintf(stderr, '\n')
+        assert (x.tp_flags & Py_TPFLAGS_READY) != 0
+        x.tp_flags &= ~Py_TPFLAGS_READY
+
+        xdict = <dict>(x.tp_dict)
+        def clear(slotname):
+            del xdict[slotname]
+#       Py_CLEAR(_x.tp_dict)     # XXX preserve some ?
+#       Py_CLEAR(_x.tp_bases)    # to be rebuilt    XXX not ok to clear wrt multi-inheritance XXX test
+        Py_CLEAR(_x.tp_mro)      # ----//----
+        Py_CLEAR(_x.tp_cache)    # ----//----
+
+        if _x.tp_new  == _o.tp_new:
+            _x.tp_new = NULL    # reinherit from Bnew on reready
+            # del xdict['__new__']  XXX raises KeyError - why?
+        if _x.tp_init == _o.tp_init:    # XXX also check other bases from mro (ex. StrEnum(str,Enum) which has Enum.__init__)
+#           fprintf(stderr, '  tp_init <- NULL\n')
+            _x.tp_init = NULL
+            #clear('__init__')      XXX
+
+    def inherit_reready(X):
+        assert isinstance(X,    type)
+        x = <PyTypeObject*>X
+#       fprintf(stderr, 'ready    %s\n', x.tp_name)
+        assert (x.tp_flags & Py_TPFLAGS_READY) == 0
+        PyType_Ready(X)
+        assert (x.tp_flags & Py_TPFLAGS_READY) != 0
+
+        # top-down
+        for Y in X.__subclasses__():
+            inherit_reready(Y)
+
+        assert (x.tp_flags & Py_TPFLAGS_VALID_VERSION_TAG) != 0
+
+    for X in (<object>typ).__subclasses__():
+        inherit_refresh(X, <object>typ_clone, <object>typ)
+    for X in (<object>typ).__subclasses__():
+        inherit_reready(X)
+
+    PyType_Modified(typ)    # XXX needed ?
+
+    """
     def refresh(x):
         assert isinstance(x, type)
         xtyp  = <PyTypeObject*>x
         _xtyp = <_XPyTypeObject*>x
-        #fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
+        fprintf(stderr, 'refreshing %s\n', xtyp.tp_name)
         assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
         xtyp.tp_flags &= ~Py_TPFLAGS_READY
         Py_CLEAR(_xtyp.tp_mro)
@@ -2410,7 +2572,8 @@ cdef _pytype_replace_by_child(PyTypeObject *typ, PyTypeObject *typ_clone,
         assert (xtyp.tp_flags & Py_TPFLAGS_READY) != 0
         for _ in x.__subclasses__():
             refresh(_)
-    for _ in (<object>typ).__subclasses__():
+    for _ in (<object>typ).__subclasses__():    # XXX + sub-sub-classes
         refresh(_)
+    """
 
     # XXX also preserve ._ob_next + ._ob_prev  (present in Py_TRACE_REFS builds)
diff --git a/golang/_golang_str_pickle.S b/golang/_golang_str_pickle.S
index 3b954bc..c889069 100644
--- a/golang/_golang_str_pickle.S
+++ b/golang/_golang_str_pickle.S
@@ -1,5 +1,5 @@
-// Copyright (C) 2023  Nexedi SA and Contributors.
-//                     Kirill Smelkov <kirr@nexedi.com>
+// Copyright (C) 2023-2024  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
 // it under the terms of the GNU General Public License version 3, or (at your
@@ -288,7 +288,7 @@ inside_counted_stk:
 
 
 // disable executable stack
-#ifndef LIBGOLANG_OS_windows
+#ifdef LIBGOLANG_OS_linux
     .section        .note.GNU-stack,"",@progbits
 #endif
 
@@ -304,7 +304,7 @@ inside_counted_stk:
 
 #if defined(LIBGOLANG_ARCH_386)
 
-#ifdef LIBGOLANG_CC_msc
+#ifdef LIBGOLANG_OS_windows     // both msvc and clang-cl
 # define CSYM_FASTCALL3(name)   @name@12     // MSVC mangles __fastcall
 # define CSYM_FASTCALL4(name)   @name@16
 #else
diff --git a/golang/_golang_str_pickle.pyx b/golang/_golang_str_pickle.pyx
index ec091c2..e6918fc 100644
--- a/golang/_golang_str_pickle.pyx
+++ b/golang/_golang_str_pickle.pyx
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2023-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -27,6 +27,7 @@ The main entry-points are _patch_str_pickle and _patch_capi_unicode_decode_as_bs
 
 from cpython cimport PyUnicode_Decode
 from cpython cimport PyBytes_FromStringAndSize, _PyBytes_Resize
+from cpython cimport PyObject_CallObject, PyObject_CallFunctionObjArgs
 
 cdef extern from "Python.h":
     char* PyBytes_AS_STRING(PyObject*)
@@ -130,6 +131,8 @@ cdef struct PicklerTypeInfo:
     Py_ssize_t off_poutput_buffer   # offsetof `PyObject *output_buffer`
     Py_ssize_t off_output_len       # offsetof `Py_ssize_t output_len`
     Py_ssize_t off_max_output_len   # offsetof `Py_ssize_t max_output_len`
+    Py_ssize_t off_pers_func        # offsetof `PyObject *pers_func`
+    Py_ssize_t off_pers_func_self   # offsetof `PyObject *pers_func_self` or -1 if this field is not there
 
 
 # XXX place ?
@@ -147,36 +150,61 @@ cdef extern from * nogil:
 
 
     // FOR_EACH_CALLCONV invokes macro X(ccname, callconv, cckind) for every supported calling convention.
-    // cckind is one of `builtin` or `custom`.
+    // cckind is one of `builtin`, `custom` or `builtin_psave0`.
+    //
+    // - `builtin` represents native calling conventions of the compiler
+    //    available to the programmer via function attributes.
+    // - `custom`  represents custom calling convention for which there is no
+    //    public attribute and via-assembly proxy needs to be used to call such function.
+    // - `builtin_psave0` represents native calling convention, but indicates
+    //    that the third argument of `save` was const-propagated with `pers_save=0`.
+    //
+    // NOTE: psave0 variants go last so that !constprop versions have higher priority to be probed.
     #ifdef LIBGOLANG_ARCH_386
     # ifndef LIBGOLANG_CC_msc
     #  define FOR_EACH_CALLCONV(X)  \
-         X(default,,                            builtin)    \
-         X(cdecl,       CALLCONV(cdecl),        builtin)    \
-         X(stdcall,     CALLCONV(stdcall),      builtin)    \
-         X(fastcall,    CALLCONV(fastcall),     builtin)    \
-         X(thiscall,    CALLCONV(thiscall),     builtin)    \
-         X(regparm1,    CALLCONV(regparm(1)),   builtin)    \
-         X(regparm2,    CALLCONV(regparm(2)),   builtin)    \
-         X(regparm3,    CALLCONV(regparm(3)),   builtin)    \
-         X(fastcall_nostkclean,  na,            custom )
+         X(default,,                                    builtin)            \
+         X(cdecl,               CALLCONV(cdecl),        builtin)            \
+         X(stdcall,             CALLCONV(stdcall),      builtin)            \
+         X(fastcall,            CALLCONV(fastcall),     builtin)            \
+         X(thiscall,            CALLCONV(thiscall),     builtin)            \
+         X(regparm1,            CALLCONV(regparm(1)),   builtin)            \
+         X(regparm2,            CALLCONV(regparm(2)),   builtin)            \
+         X(regparm3,            CALLCONV(regparm(3)),   builtin)            \
+         X(fastcall_nostkclean, na,                     custom )            \
+         X(default_psave0,,                             builtin_psave0)     \
+         X(cdecl_psave0,        CALLCONV(cdecl),        builtin_psave0)     \
+         X(stdcall_psave0,      CALLCONV(stdcall),      builtin_psave0)     \
+         X(fastcall_psave0,     CALLCONV(fastcall),     builtin_psave0)     \
+         X(thiscall_psave0,     CALLCONV(thiscall),     builtin_psave0)     \
+         X(regparm1_psave0,     CALLCONV(regparm(1)),   builtin_psave0)     \
+         X(regparm2_psave0,     CALLCONV(regparm(2)),   builtin_psave0)     \
+         X(regparm3_psave0,     CALLCONV(regparm(3)),   builtin_psave0)
     # else // MSC
     #  define FOR_EACH_CALLCONV(X)  \
-         X(default,,                            builtin)    \
-         X(cdecl,       CALLCONV(cdecl),        builtin)    \
-         X(stdcall,     CALLCONV(stdcall),      builtin)    \
-         X(fastcall,    CALLCONV(fastcall),     builtin)    \
-         /* X(CALLCONV(thiscall),   thiscall)   MSVC emits "C3865: '__thiscall': can only be used on native member functions"       */ \
+         X(default,,                                    builtin)            \
+         X(cdecl,               CALLCONV(cdecl),        builtin)            \
+         X(stdcall,             CALLCONV(stdcall),      builtin)            \
+         X(fastcall,            CALLCONV(fastcall),     builtin)            \
+         /* X(thiscall,         CALLCONV(thiscall),     builtin)   MSVC emits "C3865: '__thiscall': can only be used on native member functions"    */ \
          /* in theory we can emulate thiscall via fastcall https://tresp4sser.wordpress.com/2012/10/06/how-to-hook-thiscall-functions/ */ \
-         X(vectorcall,  CALLCONV(vectorcall),   builtin)    \
-         X(fastcall_nostkclean,  na,            custom )
+         X(vectorcall,          CALLCONV(vectorcall),   builtin)            \
+         X(fastcall_nostkclean, na,                     custom )            \
+         X(default_psave0,,                             builtin_psave0)     \
+         X(cdecl_psave0,        CALLCONV(cdecl),        builtin_psave0)     \
+         X(stdcall_psave0,      CALLCONV(stdcall),      builtin_psave0)     \
+         X(fastcall_psave0,     CALLCONV(fastcall),     builtin_psave0)     \
+         /* X(thiscall_psave0,  CALLCONV(thiscall),     builtin_psave0) */  \
+         X(vectorcall_psave0,   CALLCONV(vectorcall),   builtin_psave0)
     # endif
     #elif defined(LIBGOLANG_ARCH_amd64)
     # define FOR_EACH_CALLCONV(X)   \
-        X(default,,                             builtin)
+        X(default,,                                     builtin)            \
+        X(default_psave0,,                              builtin_psave0)
     #elif defined(LIBGOLANG_ARCH_arm64)
     # define FOR_EACH_CALLCONV(X)   \
-        X(default,,             builtin)
+        X(default,,                                     builtin)            \
+        X(default_psave0,,                              builtin_psave0)
     #else
     # error "unsupported architecture"
     #endif
@@ -221,6 +249,7 @@ cdef struct _pickle_PatchCtx:
     SaveFunc          Pickler_save_orig     # what was there before
 
     PicklerTypeInfo iPickler                # information detected about PicklerObject type
+    PyObject* pymod                         # module of the patched type
 
 
 # patch contexts for _pickle and _zodbpickle modules
@@ -234,7 +263,7 @@ cdef _pickle_PatchCtx _zpickle_patchctx
 #
 # - *STRING are loaded as bstr
 # - bstr is saved as *STRING
-# - pickletools decodes *STRING as UTF-8
+# - pickletools decodes *STRING and related opcodes as UTF-8b
 cdef _patch_str_pickle():
     try:
         import zodbpickle
@@ -246,6 +275,9 @@ cdef _patch_str_pickle():
     if PY_MAJOR_VERSION >= 3:
         import pickletools, codecs
         _codecs_escape_decode = codecs.escape_decode
+        def xread_stringnl_noescape(f):
+            data = pickletools.read_stringnl(f, decode=False, stripquotes=False)
+            return pybstr(data)
         def xread_stringnl(f):
             data = _codecs_escape_decode(pickletools.read_stringnl(f, decode=False))[0]
             return pybstr(data)
@@ -256,13 +288,15 @@ cdef _patch_str_pickle():
             data = pickletools.read_string4(f).encode('latin1')
             return pybstr(data)
 
+        pickletools.stringnl_noescape.reader = xread_stringnl_noescape
         pickletools.stringnl.reader = xread_stringnl
         pickletools.string1.reader  = xread_string1
         pickletools.string4.reader  = xread_string4
 
         if zodbpickle:
             from zodbpickle import pickletools_3 as zpickletools
-            zpickletools.stringnl.reader = xread_stringnl   # was same logic as in std pickletools
+            zpickletools.stringnl_noescape.reader = xread_stringnl_noescape # was same logic
+            zpickletools.stringnl.reader = xread_stringnl                   # as in std pickletools
             zpickletools.string1.reader  = xread_string1
             zpickletools.string4.reader  = xread_string4
 
@@ -323,7 +357,7 @@ cdef _patch_pickle(pickle, _pickle, _pickle_PatchCtx* _pctx):
             pickle.loads     = _pickle.loads
             pickle.Unpickler = _pickle.Unpickler
             pickle.dump      = _pickle.dump
-            pickle.dumps     = _pickle.dumps        # XXX needed?
+            pickle.dumps     = _pickle.dumps
             pickle.Pickler   = _pickle.Pickler
 
     # patch py
@@ -376,6 +410,18 @@ cdef _patch_pypickle(pickle, shadowed):
         self.memoize(obj)
     Pickler.dispatch[pybstr] = save_bstr
 
+    # adjust Pickler to save persistent ID in protocol 0 as UTF-8
+    Pickler_save_pers = Pickler.save_pers
+    def save_pers(self, pid):
+        if self.proto >= 1:
+            Pickler_save_pers(self, pid)
+        else:
+            pid_str = pybstr(pid)
+            if b'\n' in pid_str:
+                raise pickle.PicklingError(r'persistent ID contains \n')
+            self.write(b'P' + pid_str + b'\n')
+    Pickler.save_pers = save_pers
+
 # _patch_cpickle serves _patch_pickle for C version.
 cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx):
     # adjust load / loads to use 'bstr' encoding by default
@@ -417,6 +463,10 @@ cdef _patch_cpickle(_pickle, _pickle_PatchCtx *pctx):
     assert xsave.cconv == save.cconv, (callconv_str(xsave.cconv), callconv_str(save.cconv))
     cpatch(&pctx.Pickler_save_orig.addr, xsave.addr)
 
+    # remember the module of patched type
+    pctx.pymod = <PyObject*>_pickle
+    Py_INCREF(_pickle)  # stays alive forever
+
     # XXX test at runtime that we hooked save correctly
 
 
@@ -454,8 +504,9 @@ cdef _patch_capi_unicode_decode_as_bstr():
 
 # ---- adjusted C bits for saving ----
 
-# adjust Pickler save to save bstr via *STRING opcodes.
-# This mirrors corresponding py saving adjustments, but is more involved to implement.
+# adjust Pickler save to save bstr via *STRING opcodes and handle persistent
+# references via our codepath. This mirrors corresponding py saving
+# adjustments, but is more involved to implement.
 
 cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
     return __Pickler_xsave(&_pickle_patchctx, self, obj, pers_save)
@@ -463,12 +514,17 @@ cdef int _pickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save
 cdef int _zpickle_Pickler_xsave(PicklerObject* self, PyObject* obj, int pers_save) except -1:
     return __Pickler_xsave(&_zpickle_patchctx, self, obj, pers_save)
 
+cdef int _pickle_Pickler_xsave_psave0(PicklerObject* self, PyObject* obj) except -1:
+    return __Pickler_xsave_psave0(&_pickle_patchctx, self, obj)
+
+cdef int _zpickle_Pickler_xsave_psave0(PicklerObject* self, PyObject* obj) except -1:
+    return __Pickler_xsave_psave0(&_zpickle_patchctx, self, obj)
+
 # callconv wrappers XXX place
 cdef extern from *:
     r"""
     static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave(PicklerObject*, PyObject*, int);
     static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(PicklerObject*, PyObject*, int);
-
     #define DEF_PICKLE_XSAVE_builtin(ccname, callconv)                                      \
     static int callconv                                                                     \
     _pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj, int pers_save) {     \
@@ -480,6 +536,19 @@ cdef extern from *:
         return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave(self, obj, pers_save);       \
     }
 
+    static int __pyx_f_6golang_7_golang__pickle_Pickler_xsave_psave0(PicklerObject*, PyObject*);
+    static int __pyx_f_6golang_7_golang__zpickle_Pickler_xsave_psave0(PicklerObject*, PyObject*);
+    #define DEF_PICKLE_XSAVE_builtin_psave0(ccname, callconv)                               \
+    static int callconv                                                                     \
+    _pickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj) {                    \
+        return __pyx_f_6golang_7_golang__pickle_Pickler_xsave_psave0(self, obj);            \
+    }
+    #define DEF_ZPICKLE_XSAVE_builtin_psave0(ccname, callconv)                              \
+    static int callconv                                                                     \
+    _zpickle_Pickler_xsave_##ccname(PicklerObject* self, PyObject* obj) {                   \
+        return __pyx_f_6golang_7_golang__zpickle_Pickler_xsave_psave0(self, obj);           \
+    }
+
     #define DEF_PICKLE_XSAVE_custom(ccname, _)                                              \
         extern "C" char _pickle_Pickler_xsave_##ccname;
     #define DEF_ZPICKLE_XSAVE_custom(ccname, _)                                             \
@@ -496,7 +565,6 @@ cdef extern from *:
         SaveFunc{(void*)&_pickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
     FOR_EACH_CALLCONV(PICKLE_CC_XSAVE)
     };
-
     static std::vector<SaveFunc> _zpickle_Pickler_xsave_ccv = {
     #define ZPICKLE_CC_XSAVE(ccname, _, __) \
         SaveFunc{(void*)&_zpickle_Pickler_xsave_##ccname, CALLCONV_##ccname},
@@ -520,12 +588,52 @@ cdef extern from *:
 
 
 cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, int pers_save) except -1:
-    # !bstr -> use builtin pickle code
-    if obj.ob_type != <PyTypeObject*>pybstr:
-        return save_invoke(pctx.Pickler_save_orig.addr, pctx.Pickler_save_orig.cconv,
-                                self, obj, pers_save)
+    # do not rely on pers_save value and instead set .pers_func=NULL during the
+    # call not to let xpers_save to be entered recursively and to deactivate
+    # original save->pers_save codepath. See note in __detect_save_callconv
+    # about why pers_save value might be unreliable.
+    #
+    # we are ok to do adjust .pers_save because Pickler, from the beginning, is
+    # not safe to be used form multiple threads simultaneously.
+    ppers_func = <PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func)
+    pers_func  = ppers_func[0]
+    try:
+        ppers_func[0] = NULL
+        return ___Pickler_xsave(pctx, self, obj, pers_func)
+    finally:
+        ppers_func[0] = pers_func
+
+# __Pickler_xsave_psave0 is used instead of __Pickler_xsave when we detected
+# that original save might be compiled with pers_save const-propagated with 0.
+cdef int __Pickler_xsave_psave0(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj) except -1:
+    # similarly to __Pickler_xsave set .pers_func=NULL during the call not to
+    # let xpers_save to be entered recursively and to deactivate original
+    # save->pers_save codepath.
+    ppers_func = <PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func)
+    pers_func  = ppers_func[0]
+    try:
+        ppers_func[0] = NULL
+        return ___Pickler_xsave(pctx, self, obj, pers_func)
+    finally:
+        ppers_func[0] = pers_func
+
+cdef int ___Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, PyObject* pers_func) except -1:
+    # persistent reference
+    if pers_func != NULL:
+        st = __Pickler_xsave_pers(pctx, self, obj, pers_func)
+        if st != 0:
+            return st
+
+    # bstr
+    if obj.ob_type == <PyTypeObject*>pybstr:
+        return __Pickler_xsave_bstr(pctx, self, obj)
 
-    # bstr  -> pickle it as *STRING
+    # everything else -> use builtin pickle code
+    return save_invoke(pctx.Pickler_save_orig.addr, pctx.Pickler_save_orig.cconv, self, obj)
+
+
+# __Pickler_xsave_bstr saves bstr as *STRING.
+cdef int __Pickler_xsave_bstr(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj) except -1:
     cdef const char* s
     cdef Py_ssize_t  l
     cdef byte[5]     h
@@ -564,6 +672,43 @@ cdef int __Pickler_xsave(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject*
 
     return 0
 
+# __Pickler_xsave_pers detects if obj has persistent ID and, if yes, saves it as persistent references.
+# XXX explain: proto=0 UTF8-b instead of ascii and \n rejected
+# XXX and exists to be able to patch save when CC does constprop
+cdef int __Pickler_xsave_pers(_pickle_PatchCtx* pctx, PicklerObject* self, PyObject* obj, PyObject* pers_func) except -1:
+    cdef PyObject* pers_func_self = NULL
+
+    if pctx.iPickler.off_pers_func_self != -1:
+        pers_func_self = (<PyObject**>((<byte*>self) + pctx.iPickler.off_pers_func_self))[0]
+
+    pid = _call_meth(pers_func, pers_func_self, obj)
+    if pid is None:
+        return 0
+
+    cdef int bin = (<int*>((<byte*>self) + pctx.iPickler.off_bin))[0]
+    if bin:
+        __Pickler_xsave(pctx, self, <PyObject*>pid, 1)
+        __Pickler_xWrite(pctx, self, b'Q', 1)   # BINPERSID
+
+    else:
+        pid_str = pybstr(pid)
+        if b'\n' in pid_str:
+            raise (<object>pctx.pymod).PicklingError(r'persistent ID contains \n')
+        s = PyBytes_AS_STRING(<PyObject*>pid_str)
+        l = PyBytes_GET_SIZE(<PyObject*>pid_str)
+        __Pickler_xWrite(pctx, self, b'P', 1)   # PERSID
+        __Pickler_xWrite(pctx, self, s, l)
+        __Pickler_xWrite(pctx, self, b'\n', 1)
+
+    return 1
+
+# _call_meth invokes func(self, obj)  or func(obj) if self is NULL.
+cdef object _call_meth(PyObject* func, PyObject* self, PyObject* obj):
+    if self != NULL:
+        return PyObject_CallFunctionObjArgs(<object>func, self, obj, NULL)
+    return PyObject_CallObject(<object>func, (<object>obj,))    # XXX PyObject_CallOneArg on py3
+
+
 
 # __Pickler_xWrite mimics original _Pickler_Write.
 #
@@ -607,7 +752,7 @@ cdef int __Pickler_xWrite(_pickle_PatchCtx* pctx, PicklerObject* self, const cha
 # _detect_Pickler_typeinfo detects information about PicklerObject type
 # through runtime introspection.
 #
-# This information is used mainly by __Pickler_xWrite.
+# This information is used mainly by __Pickler_xWrite and __Pickler_xsave_pers.
 cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *:
     cdef PicklerTypeInfo t
 
@@ -805,6 +950,65 @@ cdef PicklerTypeInfo _detect_Pickler_typeinfo(pyPickler) except *:
     markbusy(t.off_max_output_len, sizeof(Py_ssize_t))
     trace(".max_output_len:\t", t.off_max_output_len)
 
+    # .pers_func
+    # set .persistent_id to known function and find that pointers
+    obj_copy()
+    def persid_func(obj): pass
+    pyobj.persistent_id = persid_func
+    dpersid_func = obj_diff(sizeof(PyObject*))
+    assert len(dpersid_func) == 1,  dpersid_func
+    t.off_pers_func = dpersid_func[0]
+    assert (<PyObject**>(bobj + <Py_ssize_t>t.off_pers_func))[0] == <PyObject*>persid_func
+    markbusy(t.off_pers_func, sizeof(PyObject*))
+    trace('.pers_func:\t', t.off_pers_func)
+
+    # .pers_func_self
+    # start with class that defines .persistent_id methond, then set .persistent_id
+    # to known function and find which pointers change:
+    #   * if it is only 1 pointer - there is no .pers_func_self  (e.g. zodbpickle)
+    #   * if it is 2 pointers     - .pers_func_self is there and it is reset to NULL
+    class pyPickler2(pyPickler):
+        def persistent_id(self, obj): pass
+    assert isinstance(pyPickler2, type)
+    cdef PyTypeObject*   Pickler2  = <PyTypeObject*>   pyPickler2
+    cdef _XPyTypeObject* xPickler2 = <_XPyTypeObject*> pyPickler2
+
+    assert Pickler2.tp_basicsize >= t.size
+    assert Pickler2.tp_itemsize  == 0
+
+    pyobj = pyPickler2(Null())
+    obj = <PyObject*>pyobj
+    assert obj.ob_type == Pickler2
+    bobj = <byte*>obj
+
+    obj_copy()
+    pyPickler.persistent_id.__set__(pyobj, persid_func)
+    dpersid_meth = obj_diff(sizeof(PyObject*))
+    assert len(dpersid_meth) in (1,2),  dpersid_meth
+    cdef Py_ssize_t off1, off2
+    if len(dpersid_meth) == 1:
+        t.off_pers_func_self = -1
+        assert dpersid_meth[0] == t.off_pers_func
+        assert (<PyObject**>(bobj + <Py_ssize_t>t.off_pers_func))[0] == <PyObject*>persid_func
+    else:
+        assert len(dpersid_meth) == 2
+        off1 = <Py_ssize_t>(dpersid_meth[0])
+        off2 = <Py_ssize_t>(dpersid_meth[1])
+        val1 = (<PyObject**>(bobj + off1))[0]
+        val2 = (<PyObject**>(bobj + off2))[0]
+        if val1 == NULL:
+            assert off2 == t.off_pers_func
+            assert val2 == <PyObject*>persid_func
+            t.off_pers_func_self = off1
+        elif val2 == NULL:
+            assert off1 == t.off_pers_func
+            assert val1 == <PyObject*>persid_func
+            t.off_pers_func_self = off2
+        else:
+            assert False, "cannot find NULL after resetting .pers_func_self"
+        markbusy(t.off_pers_func_self, sizeof(PyObject*))
+    trace('.pers_func_self:\t', t.off_pers_func_self)
+
     free(bobj2)
     return t
 
@@ -931,6 +1135,22 @@ cdef extern from * nogil: # see _golang_str_pickle.S for details
 # convention is usually the same as default, but on e.g. i386 - where the
 # default cdecl means to put arguments on the stack, the compiler usually
 # changes calling convention to use registers instead.
+#
+# It might be also the case that the code is generated with const-propagated
+# pers_save=0 so save becomes a function with 2 arguments instead of 3. Such
+# variants are also probed, and if we see that 2-args probe worked ok, we do not
+# delve into proving whether pers_save was really const-propagated or not: even
+# if it is not const-propagated __Pickler_xsave_psave0 deactivates original
+# save->pers_save codepath so the worst that can happen is that we ignore
+# pers_save argument passed in a register or on the stack. We are ok to do that
+# because we let the probe go only if stkclean_by_callee is the same for both
+# save and probe, and because original code passes pers_save=0 all around
+# except from inside pers_save which we deactivate.
+#
+# Note that regarding pers_save the detection of calling convention is not
+# reliable because save is invoked with pers_save=0 and zeros might be present
+# in a register or on the stack for unrelated reason. For this reason
+# __Pickler_xsave does not rely on pers_save value at all in its control flow.
 cdef Callconv __detect_save_callconv(pyPickler, void* save) except *:
     for p in saveprobe_test_ccv:
         #print("save: probing %s" % callconv_str(p.cconv))
@@ -1001,6 +1221,11 @@ cdef extern from * nogil:
         saveprobe_##ccname(void* self, PyObject* obj, int pers_save) {  \
             return saveprobe(self, obj, pers_save);                     \
         }
+    #define DEF_SAVEPROBE_builtin_psave0(ccname, callconv)              \
+        static int callconv                                             \
+        saveprobe_##ccname(void* self, PyObject* obj) {                 \
+            return saveprobe(self, obj, 0);                             \
+        }
     #define DEF_SAVEPROBE_custom(ccname, _)                             \
         extern "C" char saveprobe_##ccname;
     #define DEF_SAVEPROBE(ccname, callconv, cckind) DEF_SAVEPROBE_##cckind(ccname, callconv)
@@ -1028,20 +1253,28 @@ cdef extern from * nogil:
     vector[SaveFunc] saveprobe_test_ccv
 
 
-# XXX doc save_invoke ...
+# XXX doc save_invoke pers_save=1 ...
 # XXX place
 cdef extern from *:
     r"""
     #define CC_SAVE_DEFCALL1_builtin(ccname, callconv)
+    #define CC_SAVE_DEFCALL1_builtin_psave0(ccname, callconv)
     #define CC_SAVE_DEFCALL1_custom(ccname, _)  \
         extern "C" int CALLCONV(fastcall)       \
         save_invoke_as_##ccname(void* save, void* self, PyObject* obj, int pers_save);
     #define CC_SAVE_DEFCALL1(ccname, callconv, cckind)  CC_SAVE_DEFCALL1_##cckind(ccname, callconv)
     FOR_EACH_CALLCONV(CC_SAVE_DEFCALL1)
 
-    static int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) {
+    static int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj) {
         using namespace golang;
 
+        // passing pers_save is unreliable and we anyway always deactivate
+        // original save->pers_save codepath and handle persistent references
+        // ourselves. But try to deactivate it here once more just in case.
+        //
+        // See __Pickler_xsave and note in __detect_save_callconv for details.
+        int pers_save = 1;
+
         switch(cconv) {
     #define CC_SAVE_CALL1_builtin(ccname, callconv)     \
         case CALLCONV_ ## ccname:                                   \
@@ -1050,6 +1283,10 @@ cdef extern from *:
     #define CC_SAVE_CALL1_custom(ccname, _)             \
         case CALLCONV_ ## ccname:                                   \
             return save_invoke_as_##ccname(save, self, obj, pers_save);
+    #define CC_SAVE_CALL1_builtin_psave0(ccname, callconv)  \
+        case CALLCONV_ ## ccname:                                   \
+            return ((int (callconv *)(void*, PyObject*))save)       \
+                    (self, obj);
     #define CC_SAVE_CALL1(ccname, callconv, cckind) CC_SAVE_CALL1_##cckind(ccname, callconv)
     FOR_EACH_CALLCONV(CC_SAVE_CALL1)
         default:
@@ -1057,7 +1294,7 @@ cdef extern from *:
         }
     }
     """
-    int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj, int pers_save) except -1
+    int save_invoke(void* save, Callconv cconv, void* self, PyObject* obj) except -1
 
 
 # - cfunc_direct_callees returns addresses of functions that cfunc calls directly.
diff --git a/golang/_golang_str_pickle_test.pyx b/golang/_golang_str_pickle_test.pyx
index 62c9a2f..b041974 100644
--- a/golang/_golang_str_pickle_test.pyx
+++ b/golang/_golang_str_pickle_test.pyx
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2023  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2023-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -102,16 +102,16 @@ cdef extern from * nogil:
     int CALLCONV(fastcall)
     tfunc_fastcall3(int x, int y, int z)    { return x; }
 
-    #ifndef LIBGOLANG_CC_msc    // see note about C3865 in FOR_EACH_CALLCONV
+    # ifndef LIBGOLANG_CC_msc   // see note about C3865 in FOR_EACH_CALLCONV
     int CALLCONV(thiscall)
     tfunc_thiscall1(int x)                  { return x; }
     int CALLCONV(thiscall)
     tfunc_thiscall2(int x, int y)           { return x; }
     int CALLCONV(thiscall)
     tfunc_thiscall3(int x, int y, int z)    { return x; }
-    #endif
+    # endif
 
-    #ifndef LIBGOLANG_CC_msc    // no regparm on MSCV
+    # ifndef LIBGOLANG_CC_msc   // no regparm on MSVC
     int CALLCONV(regparm(1))
     tfunc_regparm1_1(int x)                 { return x; }
     int CALLCONV(regparm(1))
@@ -132,7 +132,7 @@ cdef extern from * nogil:
     tfunc_regparm3_2(int x, int y)          { return x; }
     int CALLCONV(regparm(3))
     tfunc_regparm3_3(int x, int y, int z)   { return x; }
-    #endif
+    # endif
 
     static std::vector<_Test_cfunc_is_callee_clenup> _cfunc_is_callee_cleanup_testv = {
         CASE(tfunc_cdecl1     , 0 * 4),
@@ -144,12 +144,12 @@ cdef extern from * nogil:
         CASE(tfunc_fastcall1  , 0 * 4),
         CASE(tfunc_fastcall2  , 0 * 4),
         CASE(tfunc_fastcall3  , 1 * 4),
-    #ifndef LIBGOLANG_CC_msc
+    # ifndef LIBGOLANG_CC_msc
         CASE(tfunc_thiscall1  , 0 * 4),
         CASE(tfunc_thiscall2  , 1 * 4),
         CASE(tfunc_thiscall3  , 2 * 4),
-    #endif
-    #ifndef LIBGOLANG_CC_msc
+    # endif
+    # ifndef LIBGOLANG_CC_msc
         CASE(tfunc_regparm1_1 , 0 * 4),
         CASE(tfunc_regparm1_2 , 0 * 4),
         CASE(tfunc_regparm1_3 , 0 * 4),
@@ -159,7 +159,7 @@ cdef extern from * nogil:
         CASE(tfunc_regparm3_1 , 0 * 4),
         CASE(tfunc_regparm3_2 , 0 * 4),
         CASE(tfunc_regparm3_3 , 0 * 4),
-    #endif
+    # endif
     };
 
     #else
diff --git a/golang/_gopath.py b/golang/_gopath.py
index 8f34b33..d5e1f2a 100644
--- a/golang/_gopath.py
+++ b/golang/_gopath.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2019  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -34,11 +34,7 @@
 
 import os, os.path
 import sys
-
-import warnings
-with warnings.catch_warnings():
-    warnings.simplefilter('ignore', DeprecationWarning)
-    import imp
+import six
 
 # _gopathv returns $GOPATH vector.
 def _gopathv():
@@ -51,11 +47,25 @@ def _gopathv():
 
 # gimport imports python module or package from fully-qualified module name under $GOPATH.
 def gimport(name):
-    imp.acquire_lock()
+    _gimport_lock()
     try:
         return _gimport(name)
     finally:
-        imp.release_lock()
+        _gimport_unlock()
+
+# on py2 there is global import lock
+# on py3 we need to organize our own gimport synchronization
+if six.PY2:
+    import imp
+    _gimport_lock   = imp.acquire_lock
+    _gimport_unlock = imp.release_lock
+else:
+    from importlib import machinery as imp_machinery
+    from importlib import util      as imp_util
+    from golang import sync
+    _gimport_mu = sync.Mutex()
+    _gimport_lock   = _gimport_mu.lock
+    _gimport_unlock = _gimport_mu.unlock
 
 def _gimport(name):
     # we will register imported module into sys.modules with adjusted path.
@@ -93,4 +103,16 @@ def _gimport(name):
 
 
     # https://stackoverflow.com/a/67692
-    return imp.load_source(modname, modpath)
+    return _imp_load_source(modname, modpath)
+
+def _imp_load_source(modname, modpath):
+    if six.PY2:
+        return imp.load_source(modname, modpath)
+
+    # https://docs.python.org/3/whatsnew/3.12.html#imp
+    loader = imp_machinery.SourceFileLoader(modname, modpath)
+    spec   = imp_util.spec_from_file_location(modname, modpath, loader=loader)
+    mod    = imp_util.module_from_spec(spec)
+    sys.modules[modname] = mod
+    loader.exec_module(mod)
+    return mod
diff --git a/golang/_strconv.pyx b/golang/_strconv.pyx
index 3b1db0c..03c7528 100644
--- a/golang/_strconv.pyx
+++ b/golang/_strconv.pyx
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # cython: language_level=2
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/fmt.h b/golang/fmt.h
index 7c33802..a039529 100644
--- a/golang/fmt.h
+++ b/golang/fmt.h
@@ -1,7 +1,7 @@
 #ifndef _NXD_LIBGOLANG_FMT_H
 #define _NXD_LIBGOLANG_FMT_H
 
-// Copyright (C) 2019-2023  Nexedi SA and Contributors.
+// Copyright (C) 2019-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/golang_str_pickle_test.py b/golang/golang_str_pickle_test.py
index 1bf1a7b..c782fd7 100644
--- a/golang/golang_str_pickle_test.py
+++ b/golang/golang_str_pickle_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2022-2023  Nexedi SA and Contributors.
+# Copyright (C) 2022-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -70,30 +70,30 @@ def pickle2tools(pickle):
 
 # ---- pickling/unpickling under gpystr ----
 
-# verify that loading *STRING opcodes loads them as bstr on gpython by default.
-# TODO or with encoding='bstr' under plain py
-@gpystr_only
-def test_string_pickle_load_STRING(pickle):
-    p_str   = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80\\xff'\n."      # STRING 'мир\xff'
-    p_utf8  = b"S'"+xbytes('мир')+b"\\xff'\n."                  # STRING 'мир\xff'
-    p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.'             # SHORT_BINSTRING 'мир\xff'
-    p_bins  = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
+# test pickles with *STRING
+STRING_bytes = xbytes('мир')+b'\xff'    # binary data in all test *STRING pickles
+p_str   = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80\\xff'\n."      # STRING 'мир\xff'
+p_utf8  = b"S'"+xbytes('мир')+b"\\xff'\n."                  # STRING 'мир\xff'
+p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.'             # SHORT_BINSTRING 'мир\xff'
+p_bins  = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
 
-    p_bytes = xbytes('мир')+b'\xff'
+# checkSTRING invokes f on all test *STRING pickles.
+def checkSTRING(f):
+    f(p_str)
+    f(p_utf8)
+    f(p_sbins)
+    f(p_bins)
 
-    # check invokes f on all test pickles
-    def check(f):
-        f(p_str)
-        f(p_utf8)
-        f(p_sbins)
-        f(p_bins)
+# verify that loading *STRING opcodes loads them as bstr on gpython by default.
+@gpystr_only
+def test_strings_pickle_load_STRING(pickle):
+    check = checkSTRING
 
     # default -> bstr  on both py2 and py3
-    # TODO only this check is gpystr_only -> remove whole-func @gpystr_only
     def _(p):
         obj = xloads(pickle, p)
         assert type(obj) is bstr
-        assert obj == p_bytes
+        assert obj == STRING_bytes
     check(_)
 
     # also test bstr inside tuple (for symmetry with save)
@@ -104,49 +104,34 @@ def _(p):
         assert len(tobj) == 1
         obj = tobj[0]
         assert type(obj) is bstr
-        assert obj == p_bytes
+        assert obj == STRING_bytes
     check(_)
 
-    # pickle supports encoding=... only on py3
-    if six.PY3:
-        # encoding='bstr'  -> bstr
-        def _(p):
-            obj = xloads(pickle, p, encoding='bstr')
-            assert type(obj) is bstr
-            assert obj == p_bytes
-        check(_)
-
-        # encoding='bytes' -> bytes
-        def _(p):
-            obj = xloads(pickle, p, encoding='bytes')
-            assert type(obj) is bytes
-            assert obj == p_bytes
-        check(_)
-
-        # encoding='utf-8' -> UnicodeDecodeError
-        def _(p):
-            with raises(UnicodeDecodeError):
-                xloads(pickle, p, encoding='utf-8')
-        check(_)
-
-        # encoding='utf-8', errors=... -> unicode
-        def _(p):
-            obj = xloads(pickle, p, encoding='utf-8', errors='backslashreplace')
-            assert type(obj) is unicode
-            assert obj == u'мир\\xff'
-        check(_)
-
+    # also test bstr used as persistent reference directly and as part of tuple (symmetry with save)
+    def _(p):
+        p_ = p[:-1] + b'Q.'
+        pobj = ploads(pickle, p_)
+        assert type(pobj) is tPersistent
+        assert type(pobj._p_oid) is bstr
+        assert pobj._p_oid == STRING_bytes
+    check(_)
+    def _(p):
+        p_ = b'(' + p[:-1] + b'tQ.'
+        pobj = ploads(pickle, p_)
+        assert type(pobj) is tPersistent
+        assert type(pobj._p_oid) is tuple
+        assert len(pobj._p_oid) == 1
+        obj = pobj._p_oid[0]
+        assert type(obj) is bstr
+        assert obj == STRING_bytes
+    check(_)
 
 # verify that saving bstr results in *STRING opcodes on gpython.
 @gpystr_only
 def test_strings_pickle_save_STRING(pickle):
-    s = s0 = b(xbytes('мир')+b'\xff')
+    s = s0 = b(STRING_bytes)
     assert type(s) is bstr
 
-    p_utf8  = b"S'"+xbytes('мир')+b"\\xff'\n."                  # STRING 'мир\xff'
-    p_sbins = b'U\x07\xd0\xbc\xd0\xb8\xd1\x80\xff.'             # SHORT_BINSTRING 'мир\xff'
-    p_bins  = b'T\x07\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80\xff.' # BINSTRING 'мир\xff'
-
     def dumps(proto):
         return xdumps(pickle, s, proto)
 
@@ -163,18 +148,84 @@ def dumps(proto):
     # also test bstr inside tuple to verify that what we patched is actually
     # _pickle.save that is invoked from inside other save_X functions.
     s = (s0,)
-    p_tutf8  = b'(' + p_utf8[:-1]  + b't.'
-    p_tsbins = b'(' + p_sbins[:-1] + b't.'
-    assert dumps(0) == p_tutf8
-    assert dumps(1) == p_tsbins
+    p_tuple_utf8  = b'(' + p_utf8[:-1]  + b't.'
+    p_tuple_sbins = b'(' + p_sbins[:-1] + b't.'
+    assert dumps(0) == p_tuple_utf8
+    assert dumps(1) == p_tuple_sbins
     # don't test proto ≥ 2 because they start to use TUPLE1 instead of TUPLE
 
+    # also test bstr used as persistent reference to verify pers_save codepath
+    obj = tPersistent(s0)
+    def dumps(proto):
+        return pdumps(pickle, obj, proto)
+    assert dumps(0) == b'P' + STRING_bytes + '\n.'
+    for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
+        assert dumps(proto) == p_sbins[:-1] + b'Q.'
+
+    # ... and peristent reference being tuple to verifiy pers_save
+    # stringification in proto=0 and recursion to save in proto≥1.
+    obj = tPersistent((s0,))
+    try:
+        assert dumps(0) == b'P(' + p_utf8[1:-2] + ',)\n.'
+    except pickle.PicklingError as e:
+        # on py2 cpickle insists that with proto=0 pid must be string
+        if six.PY2:
+            assert e.args == ('persistent id must be string',)
+        else:
+            raise
+    assert dumps(1) == p_tuple_sbins[:-1] + b'Q.'
+    # no proto ≥ 2 because they start to use TUPLE1 instead of TUPLE
+
+    # proto 0 with \n in persid -> rejected
+    obj = tPersistent(b('a\nb'))
+    if six.PY3: # TODO also consider patching save_pers codepath on py2
+        with raises(pickle.PicklingError, match=r'persistent ID contains \\n') as e:
+            dumps(0)
+    for proto in range(1, HIGHEST_PROTOCOL(pickle)+1):
+        assert dumps(proto) == b'U\x03a\nbQ.'
+
+
+# verify that unpickling handles encoding=bstr|* .
+# TODO also handle encoding='bstr' under plain py
+@mark.skipif(not six.PY3, reason="pickle supports encoding=... only on py3")
+@gpystr_only
+def test_strings_pickle_load_encoding(pickle):
+    check = checkSTRING
+
+    # encoding='bstr'  -> bstr
+    def _(p):
+        obj = xloads(pickle, p, encoding='bstr')
+        assert type(obj) is bstr
+        assert obj == STRING_bytes
+    check(_)
+
+    # encoding='bytes' -> bytes
+    def _(p):
+        obj = xloads(pickle, p, encoding='bytes')
+        assert type(obj) is bytes
+        assert obj == STRING_bytes
+    check(_)
+
+    # encoding='utf-8' -> UnicodeDecodeError
+    def _(p):
+        with raises(UnicodeDecodeError):
+            xloads(pickle, p, encoding='utf-8')
+    check(_)
+
+    # encoding='utf-8', errors=... -> unicode
+    def _(p):
+        obj = xloads(pickle, p, encoding='utf-8', errors='backslashreplace')
+        assert type(obj) is unicode
+        assert obj == u'мир\\xff'
+    check(_)
+
+
 
 # verify that loading *UNICODE opcodes loads them as unicode/ustr.
 # this is standard behaviour but we verify it since we patch pickle's strings processing.
 # also verify save lightly for symmetry.
 # NOTE not @gpystr_only
-def test_string_pickle_loadsave_UNICODE(pickle):
+def test_strings_pickle_loadsave_UNICODE(pickle):
     # NOTE builtin pickle behaviour is to save unicode via 'surrogatepass' error handler
     #      this means that b'мир\xff' -> ustr/unicode -> save will emit *UNICODE with
     #      b'мир\xed\xb3\xbf' instead of b'мир\xff' as data.
@@ -263,7 +314,7 @@ def assert_pickle(obj, proto, dumps_ok_gpystr, dumps_ok_stdstr):
              b'cgolang\nbstr\n(X\x09\x00\x00\x00'                           # bstr(BINUNICODE)
                         b'\xd0\xbc\xd0\xb8\xd1\x80\xed\xb3\xbftR.')
 
-    # NOTE BINUNICODE ...edb3bf not ...ff  (see test_string_pickle_loadsave_UNICODE for details)
+    # NOTE BINUNICODE ...edb3bf not ...ff  (see test_strings_pickle_loadsave_UNICODE for details)
     _(us, 1, b'X\x09\x00\x00\x00\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbf.',     # BINUNICODE
              b'cgolang\nustr\n(X\x09\x00\x00\x00'                           # bstr(BINUNICODE)
                         b'\xd0\xbc\xd0\xb0\xd0\xb9\xed\xb3\xbftR.')
@@ -302,38 +353,48 @@ def xdiss(pickletools, p): # -> str
     pickletools.dis(p, out)
     return out.getvalue()
 
-# verify that disassembling *STRING opcodes works with treating strings as UTF8b.
+# verify that disassembling *STRING and related opcodes works with treating strings as UTF8b.
 @gpystr_only
-def test_string_pickle_dis_STRING(pickletools):
-    p_str   = b"S'\\xd0\\xbc\\xd0\\xb8\\xd1\\x80'\n."       # STRING 'мир'
-    p_sbins = b'U\x06\xd0\xbc\xd0\xb8\xd1\x80.'             # SHORT_BINSTRING 'мир'
-    p_bins  = b'T\x06\x00\x00\x00\xd0\xbc\xd0\xb8\xd1\x80.' # BINSTRING 'мир'
-
-    bmir = x32("b('мир')", "'мир'")
+def test_strings_pickle_dis_STRING(pickletools):
+    brepr = repr(b(STRING_bytes))
 
     assert xdiss(pickletools, p_str) == """\
     0: S    STRING     %s
-   28: .    STOP
+   32: .    STOP
 highest protocol among opcodes = 0
-""" % bmir
+""" % brepr
+
+    assert xdiss(pickletools, p_utf8) == """\
+    0: S    STRING     %s
+   14: .    STOP
+highest protocol among opcodes = 0
+""" % brepr
 
     assert xdiss(pickletools, p_sbins) == """\
     0: U    SHORT_BINSTRING %s
-    8: .    STOP
+    9: .    STOP
 highest protocol among opcodes = 1
-""" % bmir
+""" % brepr
 
     assert xdiss(pickletools, p_bins) == """\
     0: T    BINSTRING  %s
-   11: .    STOP
+   12: .    STOP
 highest protocol among opcodes = 1
-""" % bmir
+""" % brepr
+
+    assert xdiss(pickletools, b'P' + STRING_bytes + b'\n.') == """\
+    0: P    PERSID     %s
+    9: .    STOP
+highest protocol among opcodes = 0
+""" % brepr
 
 
 # ---- loads and normalized dumps ----
 
 # xloads loads pickle p via pickle.loads
 # it also verifies that .load and Unpickler.load give the same result.
+#
+# see also: ploads.
 def xloads(pickle, p, **kw):
     obj1 = _xpickle_attr(pickle, 'loads')(p, **kw)
     obj2 = _xpickle_attr(pickle, 'load') (io.BytesIO(p), **kw)
@@ -346,6 +407,8 @@ def xloads(pickle, p, **kw):
 # xdumps dumps obj via pickle.dumps
 # it also verifies that .dump and Pickler.dump give the same.
 # the pickle is returned in normalized form - see pickle_normalize for details.
+#
+# see also: pdumps.
 def xdumps(pickle, obj, proto, **kw):
     p1 = _xpickle_attr(pickle, 'dumps')(obj, proto, **kw)
     f2 = io.BytesIO();  _xpickle_attr(pickle, 'dump')(obj, f2, proto, **kw)
@@ -359,10 +422,85 @@ def xdumps(pickle, obj, proto, **kw):
 
     # remove not interesting parts: PROTO / FRAME header and unused PUTs
     if proto >= 2:
-        protover = PROTO(proto)
-        assert p1.startswith(protover)
+        assert p1.startswith(PROTO(proto))
     return pickle_normalize(pickle2tools(pickle), p1)
 
+# ploads loads pickle p via pickle.Unpickler with handling persistent references.
+#
+# see also: xloads.
+def ploads(pickle, p, **kw):
+    Unpickler = _xpickle_attr(pickle, 'Unpickler')
+
+    u1 = Unpickler(io.BytesIO(p), **kw)
+    u1.persistent_load = lambda pid: tPersistent(pid)
+    obj1 = u1.load()
+
+    # same with .persistent_load defined as class method
+    try:
+        class Unpickler2(Unpickler):
+            def persistent_load(self, pid): return tPersistent(pid)
+    except TypeError:
+        if six.PY2:
+            # on py2 cPickle.Unpickler is not subclassable at all
+            obj2 = obj1
+        else:
+            raise
+    else:
+        u2 = Unpickler2(io.BytesIO(p), **kw)
+        obj2 = u2.load()
+
+    assert obj1 == obj2
+    return obj1
+
+# pdumps dumps obj via pickle.Pickler with handling persistent references.
+# the pickle is returned in normalized form - see pickle_normalize for details.
+#
+# see also: xdumps.
+def pdumps(pickle, obj, proto, **kw):
+    Pickler = _xpickle_attr(pickle, 'Pickler')
+
+    f1 = io.BytesIO()
+    p1 = Pickler(f1, proto, **kw)
+    def _(obj):
+        if isinstance(obj, tPersistent):
+            return obj._p_oid
+        return None
+    p1.persistent_id = _
+    p1.dump(obj)
+    pobj1 = f1.getvalue()
+
+    # same with .persistent_id defined as class method
+    try:
+        class Pickler2(Pickler):
+            def persistent_id(self, obj):
+                if isinstance(obj, tPersistent):
+                    return obj._p_oid
+                return None
+    except TypeError:
+        if six.PY2:
+            # on py2 cPickle.Pickler is not subclassable at all
+            pobj2 = pobj1
+        else:
+            raise
+    else:
+        f2 = io.BytesIO()
+        p2 = Pickler2(f2, proto, **kw)
+        p2.dump(obj)
+        pobj2 = f2.getvalue()
+
+    assert pobj1 == pobj2
+
+    if proto >= 2:
+        assert pobj1.startswith(PROTO(proto))
+    return pickle_normalize(pickle2tools(pickle), pobj1)
+
+# tPersistent is test class to verify handling of persistent references.
+class tPersistent(object):
+    def __init__(t, pid):
+        t._p_oid = pid
+    def __eq__(t, rhs): return (type(rhs) is type(t))  and  (rhs._p_oid == t._p_oid)
+    def __ne__(t, rhs): return not (t.__eq__(rhs))
+
 def _xpickle_attr(pickle, name):
     # on py3 pickle.py tries to import from C _pickle to optimize by default
     # -> verify py version if we are asked to test pickle.py
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 0692de7..975584a 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -26,6 +26,7 @@
 from golang.gcompat import qq
 from golang.strconv_test import byterange
 from golang.golang_test import readfile, assertDoc, _pyrun, dir_testprog, PIPE
+from gpython import _tEarlyStrSubclass
 from pytest import raises, mark, skip
 import sys
 import six
@@ -2558,6 +2559,50 @@ def _(delta):
     assert _(b'cde')    == b'abcde'
 
 
+# verify that str subclasses, created before str/unicode are replaced with
+# bstr/ustr, continue to work ok.
+#
+# Even though we try to patch string types early, there are always some str
+# subclasses created by builtin modules before golang is loaded. For example
+# enum.StrEnum is created early during python startup process via
+# pathlib -> fnmatch -> re -> enum import. So if we don't preserve those
+# classes to continue to work correctly things are breaking badly.
+#
+# XXX note !gpystr_only ...
+# XXX also test bytes?
+def tests_strings_early_str_subclass():
+    xstr = _tEarlyStrSubclass
+
+    # .tp_new should be adjusted to point to current str
+    # (else str.__new__ breaks with "str.__new__(xstr) is not safe ...")
+    obj = str.__new__(xstr, 'abc')
+    assert type(obj) is xstr
+    assert obj == 'abc'
+    assert xstr.__new__ is str.__new__
+
+    # follow-up .__init__ should be noop  (enum uses str.__init__ for real)
+    obj.__init__('xyz')
+    assert obj == 'abc'
+    assert str.__init__  is object.__init__
+    assert xstr.__init__ is str.__init__
+
+
+    # XXX place
+    assert xstr.__base__  is str
+    assert xstr.__bases__ == (str,)
+
+    # XXX __bases__ + __mro__ for MI
+
+
+    """
+    assert str.__base__  is object
+    assert str.__bases__ == (object,)
+    """
+
+
+    # XXX more...
+
+
 # ---- benchmarks ----
 
 # utf-8 decoding
diff --git a/golang/golang_test.py b/golang/golang_test.py
index 0b6b9cb..46d6e95 100644
--- a/golang/golang_test.py
+++ b/golang/golang_test.py
@@ -1682,6 +1682,12 @@ def test_defer_excchain_dump_ipython():
 
 # ----//---- (pytest)
 def test_defer_excchain_dump_pytest():
+    # pytest 7.4 also changed traceback output format
+    # similarly to ipython we do not need to test it becase we activate
+    # pytest-related patch only on py2 for which latest pytest version is 4.6.11 .
+    import pytest
+    if six.PY3 and pytest.version_tuple >= (7,4):
+        skip("pytest is patched only on py2; pytest7.4 changed traceback format")
     tbok = readfile(dir_testprog + "/golang_test_defer_excchain.txt-pytest")
     retcode, stdout, stderr = _pyrun([
                                 # don't let pytest emit internal deprecation warnings to stderr
diff --git a/golang/libgolang.h b/golang/libgolang.h
index 53a8aec..4131a84 100644
--- a/golang/libgolang.h
+++ b/golang/libgolang.h
@@ -1,7 +1,7 @@
 #ifndef _NXD_LIBGOLANG_H
 #define _NXD_LIBGOLANG_H
 
-// Copyright (C) 2018-2023  Nexedi SA and Contributors.
+// Copyright (C) 2018-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/os.cpp b/golang/os.cpp
index a7c7f2a..6c08fdf 100644
--- a/golang/os.cpp
+++ b/golang/os.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2023  Nexedi SA and Contributors.
+// Copyright (C) 2019-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/os.h b/golang/os.h
index 9ad0c99..1d79d05 100644
--- a/golang/os.h
+++ b/golang/os.h
@@ -1,7 +1,7 @@
 #ifndef _NXD_LIBGOLANG_OS_H
 #define _NXD_LIBGOLANG_OS_H
 //
-// Copyright (C) 2019-2023  Nexedi SA and Contributors.
+// Copyright (C) 2019-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/os/signal.cpp b/golang/os/signal.cpp
index 793e7a4..0677721 100644
--- a/golang/os/signal.cpp
+++ b/golang/os/signal.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2021-2023  Nexedi SA and Contributors.
+// Copyright (C) 2021-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/pyx/build.py b/golang/pyx/build.py
index 3c15f22..0079dd0 100644
--- a/golang/pyx/build.py
+++ b/golang/pyx/build.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2023  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/runtime.cpp b/golang/runtime.cpp
index 0fc63e6..dd398d4 100644
--- a/golang/runtime.cpp
+++ b/golang/runtime.cpp
@@ -1,5 +1,5 @@
-// Copyright (C) 2023  Nexedi SA and Contributors.
-//                     Kirill Smelkov <kirr@nexedi.com>
+// Copyright (C) 2023-2024  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
 // it under the terms of the GNU General Public License version 3, or (at your
diff --git a/golang/runtime.h b/golang/runtime.h
index 60b5765..4eecfc2 100644
--- a/golang/runtime.h
+++ b/golang/runtime.h
@@ -1,8 +1,8 @@
 #ifndef _NXD_LIBGOLANG_RUNTIME_H
 #define _NXD_LIBGOLANG_RUNTIME_H
 
-// Copyright (C) 2023  Nexedi SA and Contributors.
-//                     Kirill Smelkov <kirr@nexedi.com>
+// Copyright (C) 2023-2024  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
 // it under the terms of the GNU General Public License version 3, or (at your
diff --git a/golang/runtime/internal/atomic.cpp b/golang/runtime/internal/atomic.cpp
index 2669714..a8b57da 100644
--- a/golang/runtime/internal/atomic.cpp
+++ b/golang/runtime/internal/atomic.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2022-2023  Nexedi SA and Contributors.
+// Copyright (C) 2022-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/runtime/internal/syscall.cpp b/golang/runtime/internal/syscall.cpp
index 4602c0a..429545a 100644
--- a/golang/runtime/internal/syscall.cpp
+++ b/golang/runtime/internal/syscall.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2021-2023  Nexedi SA and Contributors.
+// Copyright (C) 2021-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/runtime/internal/syscall.h b/golang/runtime/internal/syscall.h
index 4771a19..204c5b8 100644
--- a/golang/runtime/internal/syscall.h
+++ b/golang/runtime/internal/syscall.h
@@ -1,7 +1,7 @@
 #ifndef _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H
 #define _NXD_LIBGOLANG_RUNTIME_INTERNAL_SYSCALL_H
 
-// Copyright (C) 2021-2023  Nexedi SA and Contributors.
+// Copyright (C) 2021-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/runtime/libgolang.cpp b/golang/runtime/libgolang.cpp
index 3714cc7..f91772a 100644
--- a/golang/runtime/libgolang.cpp
+++ b/golang/runtime/libgolang.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2018-2023  Nexedi SA and Contributors.
+// Copyright (C) 2018-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/golang/runtime/platform.h b/golang/runtime/platform.h
index 8def7e7..f8fa3ea 100644
--- a/golang/runtime/platform.h
+++ b/golang/runtime/platform.h
@@ -1,8 +1,8 @@
 #ifndef _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
 #define _NXD_LIBGOLANG_RUNTIME_PLATFORM_H
 
-// Copyright (C) 2023  Nexedi SA and Contributors.
-//                     Kirill Smelkov <kirr@nexedi.com>
+// Copyright (C) 2023-2024  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
 // it under the terms of the GNU General Public License version 3, or (at your
diff --git a/gpython/__init__.py b/gpython/__init__.py
index f2225f8..980f964 100755
--- a/gpython/__init__.py
+++ b/gpython/__init__.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -247,11 +247,12 @@ def run(mmain):
         pyimpl = platform.python_implementation()
 
         v = _version_info_str
+        pyver  = platform.python_version()  # ~ v(sys.version_info) but might also have e.g. '+' at tail
         if pyimpl == 'CPython':
-            ver.append('CPython %s' % v(sys.version_info))
+            ver.append('CPython %s' % pyver)
         elif pyimpl == 'PyPy':
             ver.append('PyPy %s'   % v(sys.pypy_version_info))
-            ver.append('Python %s' % v(sys.version_info))
+            ver.append('Python %s' % pyver)
         else:
             ver = [] # unknown
 
@@ -474,6 +475,7 @@ def init():
         from six.moves import builtins
         for k in golang.__all__:
             setattr(builtins, k, getattr(golang, k))
+#       setattr(builtins, 'CCC', CCC)   # XXX kill
 
         # sys.version
         sys.version += (' [GPython %s] [runtime %s] [strings %s]' % (golang.__version__, gpy_runtime_ver, gpy_strings))
@@ -594,8 +596,8 @@ def __next__(self):
     next = __next__ # for py2
 
 
-# for tests XXX continue by first writing test  XXX
-#1/0
+# for tests: subclass of str that is created before everything else is imported
+# and before golang patches builtin str/unicode types.
 class _tEarlyStrSubclass(str):
     pass
 
diff --git a/gpython/_gpython.pyx b/gpython/_gpython.pyx
index ada1df8..df49691 100644
--- a/gpython/_gpython.pyx
+++ b/gpython/_gpython.pyx
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 # cython: language_level=2
-# Copyright (C) 2023  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2023-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
diff --git a/gpython/_gpython_c.cpp b/gpython/_gpython_c.cpp
index 05ba977..59fbb93 100644
--- a/gpython/_gpython_c.cpp
+++ b/gpython/_gpython_c.cpp
@@ -1,5 +1,5 @@
-// Copyright (C) 2023  Nexedi SA and Contributors.
-//                     Kirill Smelkov <kirr@nexedi.com>
+// Copyright (C) 2023-2024  Nexedi SA and Contributors.
+//                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
 // it under the terms of the GNU General Public License version 3, or (at your
diff --git a/gpython/gpython_test.py b/gpython/gpython_test.py
index 355c2e7..85b97fb 100644
--- a/gpython/gpython_test.py
+++ b/gpython/gpython_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2019-2023  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
diff --git a/pyproject.toml b/pyproject.toml
index 07ecad3..d28a182 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,2 +1,2 @@
 [build-system]
-requires = ["setuptools", "wheel", "setuptools_dso >= 2.7", "cython < 3", "gevent"]
+requires = ["setuptools", "wheel", "setuptools_dso >= 2.8", "cython < 3", "gevent"]
diff --git a/setup.py b/setup.py
index f0539ee..9ef79dc 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,5 @@
 # pygolang | pythonic package setup
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -189,7 +189,7 @@ def install_egg_scripts(self, dist):
 # requirements of packages under "golang." namespace
 R = {
     'cmd.pybench':      {'pytest', 'py'},
-    'pyx.build':        {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.7'},
+    'pyx.build':        {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.8'},
     'x.perf.benchlib':  {'numpy'},
 }
 # TODO generate `a.b -> a`, e.g. x.perf = join(x.perf.*); x = join(x.*)
@@ -575,7 +575,7 @@ def defif(name, ok):
 
     install_requires = ['gevent', 'six', 'decorator', 'Importing;python_version<="2.7"',
                         # only runtime part: for dylink_prepare_dso
-                        'setuptools_dso >= 2.7',
+                        'setuptools_dso >= 2.8',
                         # pyx.build -> setuptools_dso uses multiprocessing
                         # setuptools_dso uses multiprocessing only on Python3, and only on systems where
                         # mp.get_start_method()!='fork', while geventmp does not work on windows.
@@ -611,6 +611,7 @@ def defif(name, ok):
         Programming Language :: Python :: 3.9
         Programming Language :: Python :: 3.10
         Programming Language :: Python :: 3.11
+        Programming Language :: Python :: 3.12
         Programming Language :: Python :: Implementation :: CPython
         Programming Language :: Python :: Implementation :: PyPy
         Operating System :: POSIX
diff --git a/tox.ini b/tox.ini
index e99c48f..6833dce 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,6 +1,6 @@
 [tox]
 envlist =
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,pypy,pypy3}-{thread,gevent}
+    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread,gevent}
 
 
 # ThreadSanitizer
@@ -10,18 +10,18 @@ envlist =
 # (*) PyPy locks its GIL (see RPyGilAcquire) by manually doing atomic cmpxchg
 # and other games, which TSAN cannot see if PyPy itself was not compiled with
 # -fsanitize=thread.
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311           }-{thread       }-tsan
+    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{thread       }-tsan
 # XXX py*-gevent-tsan would be nice to have, but at present TSAN is not
 # effective with gevent, because it does not understand greenlet "thread"
 # switching and so perceives the program as having only one thread where races
 # are impossible. Disabled to save time.
-#   {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311           }-{       gevent}-tsan
+#   {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{       gevent}-tsan
 
 
 # AddressSanitizer
 
 # XXX asan does not work with gevent: https://github.com/python-greenlet/greenlet/issues/113
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,pypy,pypy3}-{thread       }-asan
+    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread       }-asan
 
 [testenv]
 basepython =
@@ -35,6 +35,8 @@ basepython =
     py310:  python3.10
     py311d: python3.11-dbg
     py311:  python3.11
+    py312:  python3.12
+    py312d: python3.12-dbg
     pypy:   pypy
     pypy3:  pypy3
 
@@ -72,5 +74,5 @@ commands=
 # asan/tsan: tell pytest not to capture output - else it is not possible to see
 # reports from sanitizers because they crash tested process on error.
 # likewise for python debug builds.
-        asan,tsan,py{27,39,310,311}d: -s    \
+        asan,tsan,py{27,39,310,311,312}d: -s    \
         gpython/ golang/
diff --git a/trun b/trun
index d9d260d..727e063 100755
--- a/trun
+++ b/trun
@@ -1,5 +1,5 @@
 #!/usr/bin/env python
-# Copyright (C) 2019-2020  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -34,12 +34,13 @@ trun cares to run python with LD_PRELOAD set appropriately to /path/to/libtsan.s
 
 from __future__ import print_function, absolute_import
 
-import os, sys, re, subprocess, pkgutil
-import warnings
-with warnings.catch_warnings():
-    warnings.simplefilter('ignore', DeprecationWarning)
-    import imp
+import os, sys, re, subprocess, types
 PY3 = (bytes is not str)
+if PY3:
+    from importlib import machinery as imp_machinery
+else:
+    import imp, pkgutil
+
 
 # env_prepend prepends value to ${name} environment variable.
 #
@@ -64,12 +65,15 @@ def grep1(pattern, text): # -> re.Match|None
 # to import e.g. golang.pyx.build, or locate golang._golang, without built/working golang.
 def ximport_empty_golangmod():
     assert 'golang' not in sys.modules
-    golang = imp.new_module('golang')
+    golang = types.ModuleType('golang')
     golang.__package__ = 'golang'
     golang.__path__    = ['golang']
     golang.__file__    = 'golang/__init__.py'
-    golang.__loader__  = pkgutil.ImpLoader('golang', None, 'golang/__init__.py',
-                                           [None, None, imp.PY_SOURCE])
+    if PY3:
+        golang.__loader__ = imp_machinery.SourceFileLoader('golang', 'golang/__init__.py')
+    else:
+        golang.__loader__ = pkgutil.ImpLoader('golang', None, 'golang/__init__.py',
+                                              [None, None, imp.PY_SOURCE])
     sys.modules['golang'] = golang
 
 

From 4d64fd0f85ee4e177f83e9ebadd112e360ec6e9d Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Mon, 6 May 2024 11:51:31 +0300
Subject: [PATCH 22/29] X update (sync with master + ustr.translate fixes)

---
 .gitmodules                        |   3 +
 .lsan-ignore.txt                   | 124 +++++++++
 3rdparty/ratas                     |   1 +
 MANIFEST.in                        |   2 +-
 conftest.py                        |  30 ++
 golang/_golang.pyx                 |  20 +-
 golang/_golang_str.pyx             |  20 +-
 golang/_golang_test.pyx            |  21 +-
 golang/golang_str_test.py          |  22 +-
 golang/golang_test.py              |  10 +-
 golang/libgolang.h                 |   9 +-
 golang/pyx/build_test.py           |  11 +-
 golang/runtime/_libgolang.pxd      |   4 +-
 golang/runtime/_runtime_gevent.pyx |  20 +-
 golang/runtime/_runtime_thread.pyx |  55 +++-
 golang/runtime/libgolang.cpp       |  12 +-
 golang/time.cpp                    | 422 ++++++++++++++++++++++++-----
 golang/time.h                      |  14 +-
 golang/time_test.py                |  79 ++++--
 setup.py                           |  12 +-
 tox.ini                            |  19 +-
 trun                               |  55 +++-
 22 files changed, 794 insertions(+), 171 deletions(-)
 create mode 100644 .lsan-ignore.txt
 create mode 160000 3rdparty/ratas

diff --git a/.gitmodules b/.gitmodules
index c279e31..0be964e 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,6 @@
+[submodule "3rdparty/ratas"]
+	path = 3rdparty/ratas
+	url = https://github.com/jsnell/ratas.git
 [submodule "3rdparty/funchook"]
 	path = 3rdparty/funchook
 	url = https://github.com/kubo/funchook.git
diff --git a/.lsan-ignore.txt b/.lsan-ignore.txt
new file mode 100644
index 0000000..5e46ba3
--- /dev/null
+++ b/.lsan-ignore.txt
@@ -0,0 +1,124 @@
+# .lsan-ignore.txt lists memory leak events that LeakSanitizer should not
+# report when running pygolang tests.
+#
+# Many python allocations, whose lifetime coincides with python interpreter
+# lifetime, and which are not explicitly freed on python shutdown, are
+# reported as leaks by default. Disable leak reporting for those to avoid
+# non-pygolang related printouts.
+
+
+# >>> Everything created when initializing python, e.g. sys.stderr
+#   #0 0x7f21e74f3bd7 in malloc .../asan_malloc_linux.cpp:69
+#   #1 0x555f361ff9a4 in PyThread_allocate_lock Python/thread_pthread.h:385
+#   #2 0x555f3623f72a in _buffered_init Modules/_io/bufferedio.c:725
+#   #3 0x555f3623ff7e in _io_BufferedWriter___init___impl Modules/_io/bufferedio.c:1803
+#   #4 0x555f3623ff7e in _io_BufferedWriter___init__ Modules/_io/clinic/bufferedio.c.h:489
+#   #5 0x555f3610c086 in type_call Objects/typeobject.c:1103
+#   #6 0x555f3609cdcc in _PyObject_MakeTpCall Objects/call.c:214
+#   #7 0x555f3609d6a8 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:90
+#   #8 0x555f3609d6a8 in _PyObject_VectorcallTstate Include/internal/pycore_call.h:77
+#   #9 0x555f3609d6a8 in _PyObject_CallFunctionVa Objects/call.c:536
+#   #10 0x555f3609e89c in _PyObject_CallFunction_SizeT Objects/call.c:590
+#   #11 0x555f3623a0df in _io_open_impl Modules/_io/_iomodule.c:407
+#   #12 0x555f3623a0df in _io_open Modules/_io/clinic/_iomodule.c.h:264
+#   #13 0x555f360f17da in cfunction_vectorcall_FASTCALL_KEYWORDS Objects/methodobject.c:443
+#   #14 0x555f3609d54c in _PyObject_VectorcallTstate Include/internal/pycore_call.h:92
+#   #15 0x555f3609d54c in _PyObject_CallFunctionVa Objects/call.c:536
+#   #16 0x555f3609ec34 in callmethod Objects/call.c:608
+#   #17 0x555f3609ec34 in _PyObject_CallMethod Objects/call.c:677
+#   #18 0x555f361e60cf in create_stdio Python/pylifecycle.c:2244
+#   #19 0x555f361e6523 in init_sys_streams Python/pylifecycle.c:2431
+#   #20 0x555f361e6523 in init_interp_main Python/pylifecycle.c:1154
+#   #21 0x555f361e7204 in pyinit_main Python/pylifecycle.c:1230
+#   #22 0x555f361e85ba in Py_InitializeFromConfig Python/pylifecycle.c:1261
+#   #23 0x555f3621010a in pymain_init Modules/main.c:67
+#   #24 0x555f362113de in pymain_main Modules/main.c:701
+#   #25 0x555f362113de in Py_BytesMain Modules/main.c:734
+leak:^pymain_init$
+
+# >>> Everything created when importing py modules, e.g.
+#   #0 0x7f18c86f3bd7 in malloc .../asan_malloc_linux.cpp:69
+#   #1 0x55b971430acf in PyMem_RawMalloc Objects/obmalloc.c:586
+#   #2 0x55b971430acf in _PyObject_Malloc Objects/obmalloc.c:2003
+#   #3 0x55b971430acf in _PyObject_Malloc Objects/obmalloc.c:1996
+#   #4 0x55b971415696 in new_keys_object Objects/dictobject.c:632
+#   #5 0x55b971415716 in dictresize Objects/dictobject.c:1429
+#   #6 0x55b97141961a in insertion_resize Objects/dictobject.c:1183
+#   #7 0x55b97141961a in insertdict Objects/dictobject.c:1248
+#   #8 0x55b97143eb7b in add_subclass Objects/typeobject.c:6547
+#   #9 0x55b97144ca52 in type_ready_add_subclasses Objects/typeobject.c:6345
+#   #10 0x55b97144ca52 in type_ready Objects/typeobject.c:6476
+#   #11 0x55b971451a1f in PyType_Ready Objects/typeobject.c:6508
+#   #12 0x55b971451a1f in type_new_impl Objects/typeobject.c:3189
+#   #13 0x55b971451a1f in type_new Objects/typeobject.c:3323
+#   #14 0x55b971443014 in type_call Objects/typeobject.c:1091
+#   #15 0x55b9713d3dcc in _PyObject_MakeTpCall Objects/call.c:214
+#   #16 0x55b9713d47bd in _PyObject_FastCallDictTstate Objects/call.c:141
+#   #17 0x55b9713d47bd in PyObject_VectorcallDict Objects/call.c:165
+#   #18 0x55b9714d14c2 in builtin___build_class__ Python/bltinmodule.c:209
+#   #19 0x55b9714287da in cfunction_vectorcall_FASTCALL_KEYWORDS Objects/methodobject.c:443
+#   #20 0x55b9713d4a7b in _PyObject_VectorcallTstate Include/internal/pycore_call.h:92
+#   #21 0x55b9713d4a7b in PyObject_Vectorcall Objects/call.c:299
+#   #22 0x55b97137666e in _PyEval_EvalFrameDefault Python/ceval.c:4769
+#   #23 0x55b9714d7e6b in _PyEval_EvalFrame Include/internal/pycore_ceval.h:73
+#   #24 0x55b9714d7e6b in _PyEval_Vector Python/ceval.c:6434
+#   #25 0x55b9714d7e6b in PyEval_EvalCode Python/ceval.c:1148
+#   #26 0x55b9714d2e1f in builtin_exec_impl Python/bltinmodule.c:1077
+#   #27 0x55b9714d2e1f in builtin_exec Python/clinic/bltinmodule.c.h:465
+#   #28 0x55b9714287da in cfunction_vectorcall_FASTCALL_KEYWORDS Objects/methodobject.c:443
+#   #29 0x55b971376dcb in do_call_core Python/ceval.c:7349
+#   #30 0x55b971376dcb in _PyEval_EvalFrameDefault Python/ceval.c:5376
+#   #31 0x55b9714d7faf in _PyEval_EvalFrame Include/internal/pycore_ceval.h:73
+#   #32 0x55b9714d7faf in _PyEval_Vector Python/ceval.c:6434
+#   #33 0x55b9713d436e in _PyObject_VectorcallTstate Include/internal/pycore_call.h:92
+#   #34 0x55b9713d436e in object_vacall Objects/call.c:819
+#   #35 0x55b9713d63cf in PyObject_CallMethodObjArgs Objects/call.c:879
+#   #36 0x55b9715080e1 in import_find_and_load Python/import.c:1748
+#   #37 0x55b9715080e1 in PyImport_ImportModuleLevelObject Python/import.c:1847
+#   #38 0x55b97137de9c in import_name Python/ceval.c:7422
+#   #39 0x55b97137de9c in _PyEval_EvalFrameDefault Python/ceval.c:3946
+#   #40 0x55b9714d7e6b in _PyEval_EvalFrame Include/internal/pycore_ceval.h:73
+#   #41 0x55b9714d7e6b in _PyEval_Vector Python/ceval.c:6434
+#   #42 0x55b9714d7e6b in PyEval_EvalCode Python/ceval.c:1148
+#   #43 0x55b9714d2e1f in builtin_exec_impl Python/bltinmodule.c:1077
+#   #44 0x55b9714d2e1f in builtin_exec Python/clinic/bltinmodule.c.h:465
+#   #45 0x55b9714287da in cfunction_vectorcall_FASTCALL_KEYWORDS Objects/methodobject.c:443
+#   #46 0x55b971376dcb in do_call_core Python/ceval.c:7349
+#   #47 0x55b971376dcb in _PyEval_EvalFrameDefault Python/ceval.c:5376
+leak:^PyImport_Import
+#  importlib.import_module leads to
+#   #0 0x7f1951ef3bd7 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
+#   #1 0x55f399e8cacf in PyMem_RawMalloc Objects/obmalloc.c:586
+#   #2 0x55f399e8cacf in _PyObject_Malloc Objects/obmalloc.c:2003
+#   #3 0x55f399e8cacf in _PyObject_Malloc Objects/obmalloc.c:1996
+#   #4 0x55f399e86344 in PyModule_ExecDef Objects/moduleobject.c:400
+#   #5 0x55f399f6178a in exec_builtin_or_dynamic Python/import.c:2345
+#   #6 0x55f399f6178a in _imp_exec_dynamic_impl Python/import.c:2419
+#   #7 0x55f399f6178a in _imp_exec_dynamic Python/clinic/import.c.h:474
+#   #8 0x55f399e8438a in cfunction_vectorcall_O Objects/methodobject.c:514
+leak:^_imp_exec_dynamic
+
+
+# >>> Everything allocated at DSO initialization, e.g.
+#   #0 0x7f35d2af46c8 in operator new(unsigned long) .../asan_new_delete.cpp:95
+#   #1 0x7f35ce897e9f in __static_initialization_and_destruction_0 golang/context.cpp:61
+#   #2 0x7f35ce8982ef in _GLOBAL__sub_I_context.cpp golang/context.cpp:380
+#   #3 0x7f35d32838bd in call_init elf/dl-init.c:90
+#   #4 0x7f35d32838bd in call_init elf/dl-init.c:27
+#   #5 0x7f35d32839a3 in _dl_init elf/dl-init.c:137
+#   #6 0x7f35d256e023 in __GI__dl_catch_exception elf/dl-error-skeleton.c:182
+#   #7 0x7f35d328a09d in dl_open_worker elf/dl-open.c:808
+#   #8 0x7f35d256dfc9 in __GI__dl_catch_exception elf/dl-error-skeleton.c:208
+#   #9 0x7f35d328a437 in _dl_open elf/dl-open.c:884
+#   #10 0x7f35d24a4437 in dlopen_doit dlfcn/dlopen.c:56
+#   #11 0x7f35d256dfc9 in __GI__dl_catch_exception elf/dl-error-skeleton.c:208
+#   #12 0x7f35d256e07e in __GI__dl_catch_error elf/dl-error-skeleton.c:227
+#   #13 0x7f35d24a3f26 in _dlerror_run dlfcn/dlerror.c:138
+#   #14 0x7f35d24a44e8 in dlopen_implementation dlfcn/dlopen.c:71
+#   #15 0x7f35d24a44e8 in ___dlopen dlfcn/dlopen.c:81
+#   #16 0x7f35d2a77ff9 in dlopen .../sanitizer_common_interceptors.inc:6341
+leak:^_GLOBAL_
+
+
+# global<> does not deallocate its reference on purpose
+leak:^_test_global()$
diff --git a/3rdparty/ratas b/3rdparty/ratas
new file mode 160000
index 0000000..becd5fc
--- /dev/null
+++ b/3rdparty/ratas
@@ -0,0 +1 @@
+Subproject commit becd5fc5c1e9ea600cd8b3b1c24d564794fedac4
diff --git a/MANIFEST.in b/MANIFEST.in
index e2cae70..17a041e 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,4 +1,4 @@
-include COPYING README.rst CHANGELOG.rst tox.ini pyproject.toml trun .nxdtest
+include COPYING README.rst CHANGELOG.rst tox.ini pyproject.toml trun .lsan-ignore.txt .nxdtest conftest.py
 include golang/libgolang.h
 include golang/runtime/libgolang.cpp
 include golang/runtime/libpyxruntime.cpp
diff --git a/conftest.py b/conftest.py
index 1ca5c1b..1f37bdd 100644
--- a/conftest.py
+++ b/conftest.py
@@ -1,3 +1,33 @@
+# pygolang | pytest config
+# Copyright (C) 2021-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+
+from __future__ import print_function, absolute_import
+
+import gc
+
+
+# Do full GC before pytest exits, to avoid false positives in the leak detector.
+def pytest_unconfigure():
+    gc.collect()
+
+
 # ignore tests in distorm - else it breaks as e.g.
 #
 # 3rdparty/funchook/distorm/python/test_distorm3.py:15: in <module>
diff --git a/golang/_golang.pyx b/golang/_golang.pyx
index 689d6a1..2a0ba56 100644
--- a/golang/_golang.pyx
+++ b/golang/_golang.pyx
@@ -173,11 +173,18 @@ cdef void __goviac(void *arg) nogil:
 
 # ---- channels ----
 
+# _frompyx indicates that a constructor is called from pyx code
+cdef object _frompyx = object()
+
 @final
 cdef class pychan:
     def __cinit__(pychan pych, size=0, dtype=object):
-        pych.dtype = parse_dtype(dtype)
-        pych._ch = _makechan_pyexc(dtypeRegistry[<int>pych.dtype].size, size)
+        if dtype is _frompyx:
+            pych.dtype = DTYPE_STRUCTZ  # anything
+            pych._ch   = NULL
+        else:
+            pych.dtype = parse_dtype(dtype)
+            pych._ch = _makechan_pyexc(dtypeRegistry[<int>pych.dtype].size, size)
 
     # pychan.nil(X) creates new nil pychan with specified dtype.
     # TODO try to avoid exposing .nil on pychan instances, and expose only pychan.nil
@@ -370,7 +377,7 @@ cdef void pychan_asserttype(pychan pych, DType dtype) nogil:
         panic("pychan: channel type mismatch")
 
 cdef pychan pychan_from_raw(_chan *_ch, DType dtype):
-    cdef pychan pych = pychan.__new__(pychan)
+    cdef pychan pych = pychan.__new__(pychan, dtype=_frompyx)
     pych.dtype = dtype
     pych._ch   = _ch; _chanxincref(_ch)
     return pych
@@ -626,9 +633,7 @@ cdef object c_to_py(DType dtype, const chanElemBuf *cfrom):
 
 # mkpynil creates pychan instance that represents nil[dtype].
 cdef PyObject *mkpynil(DType dtype):
-    cdef pychan pynil = pychan.__new__(pychan)
-    pynil.dtype = dtype
-    pynil._ch   = NULL   # should be already NULL
+    cdef pychan pynil = pychan_from_raw(NULL, dtype)
     Py_INCREF(pynil)
     return <PyObject *>pynil
 
@@ -818,9 +823,6 @@ from libcpp.typeinfo cimport type_info
 from cython.operator cimport typeid
 from libc.string cimport strcmp
 
-# _frompyx indicates that a constructor is called from pyx code
-cdef object _frompyx = object()
-
 cdef class pyerror(Exception):
     # pyerror <- error
     @staticmethod
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 3e4a64f..6172711 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -580,6 +580,7 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
     def title(self):                        return pyb(pyu(self).title())
     def translate(self, table, delete=None):
         # bytes mode  (compatibility with str/py2)
+        # XXX isinstance(zbytes) -> isinstance(bytes) ?
         if table is None  or isinstance(table, zbytes)  or  delete is not None:
             if delete is None:  delete = b''
             return pyb(zbytes.translate(self, table, delete))
@@ -905,12 +906,7 @@ cdef class _pyustr(unicode):
 
     def translate(self, table):
         # unicode.translate does not accept bstr values
-        t = {}
-        for k,v in table.items():
-            if not isinstance(v, int):  # either unicode ordinal,
-                v = _xpyu_coerce(v)     # character or None
-            t[k] = v
-        return pyu(zunicode.translate(self, t))
+        return pyu(zunicode.translate(self, _pyustrTranslateTab(table)))
 
     def upper(self):                        return pyu(zunicode.upper(self))
     def zfill(self, width):                 return pyu(zunicode.zfill(self, width))
@@ -983,6 +979,18 @@ cdef class _pyustrIter:
         x = next(self.uiter)
         return pyu(x)
 
+# _pyustrTranslateTab wraps table for .translate to return bstr as unicode
+# because unicode.translate does not accept bstr values.
+cdef class _pyustrTranslateTab:
+    cdef object tab
+    def __init__(self, tab):
+        self.tab = tab
+    def __getitem__(self, k):
+        v = self.tab[k]
+        if not isinstance(v, int):  # either unicode ordinal,
+            v = _xpyu_coerce(v)     # character or None
+        return v
+
 
 # _bdata/_udata retrieve raw data from bytes/unicode.
 def _bdata(obj): # -> bytes
diff --git a/golang/_golang_test.pyx b/golang/_golang_test.pyx
index d029ce0..3c9f60e 100644
--- a/golang/_golang_test.pyx
+++ b/golang/_golang_test.pyx
@@ -2,7 +2,7 @@
 # cython: language_level=2
 # distutils: language=c++
 #
-# Copyright (C) 2018-2020  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -344,6 +344,25 @@ cdef nogil:
         pych.chan_double().close()
 
 
+# verify that pychan_from_raw is not leaking C channel.
+def test_pychan_from_raw_noleak():
+    # pychan_from_raw used to create another channel and leak it
+    #
+    # this test _implicitly_ verifies that it is no longer the case - if it is,
+    # LSAN will report a memory leak after running the test.
+    #
+    # TODO consider adding explicit verification effective even under regular
+    #      builds. Possible options:
+    #
+    #      * verify malloc totals before and after tested code
+    #        see e.g. https://stackoverflow.com/q/1761125/9456786
+    #      * hook _makechan and verify that it is not invoked from under
+    #        pychan_from_raw. Depends on funchook integration.
+    cdef chan[int] ch   = makechan[int]()
+    cdef pychan    pych = pychan.from_chan_int(ch)  # uses pychan_from_raw internally
+    # pych and ch are freed automatically
+
+
 # ---- benchmarks ----
 
 # bench_go_nogil mirrors golang_test.py:bench_go
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 975584a..6f88ad5 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -1567,7 +1567,10 @@ def test_strings_methods():
     # checkop verifies that `s.meth(*argv, **kw)` gives the same result for s,
     # argv and kw being various combinations of unicode,bstr,ustr, bytes/bytearray.
     def checkop(s, meth, *argv, **kw):
-        assert type(s) is str
+        if six.PY3:
+            assert type(s) is str
+        else:
+            assert type(s) in (str, unicode)    # some tests use unicode because \u does not work in str literals
         ok = kw.pop('ok')
         if six.PY2:
             ok = deepReplaceStr(ok, xunicode)
@@ -1738,7 +1741,7 @@ def _(*argv, **kw):
     _("123").isnumeric(                             ok=True)
     _("0x123").isnumeric(                           ok=False)
     _("мир").isprintable(                           ok=True,    optional=True)  # py3.0
-    _("\u2009").isspace(                            ok=x32(True,False))         # thin space
+    _(u"\u2009").isspace(                           ok=True)                    # thin space
     _("  ").isspace(                                ok=True)
     _("мир").isspace(                               ok=False)
     _("мир").istitle(                               ok=False)
@@ -1748,8 +1751,8 @@ def _(*argv, **kw):
     _("мир").ljust(10,                              ok="мир       ")
     _("мир").ljust(10, 'ж',                         ok="миржжжжжжж")
     _("МиР").lower(                                 ok="мир")
-    _("\u2009 мир").lstrip(                         ok=x32("мир", "\u2009 мир"))
-    _("\u2009 мир\u2009 ").lstrip(                  ok=x32("мир\u2009 ", "\u2009 мир\u2009 "))
+    _(u"\u2009 мир").lstrip(                        ok="мир")
+    _(u"\u2009 мир\u2009 ").lstrip(                 ok=u"мир\u2009 ")
     _("мммир").lstrip('ми',                         ok="р")
     _("миру мир").partition('ру',                   ok=("ми", "ру", " мир"))
     _("миру мир").partition('ж',                    ok=("миру мир", "", ""))
@@ -1764,15 +1767,15 @@ def _(*argv, **kw):
     _("миру мир").rpartition('ж',                   ok=("", "", "миру мир"))
     _("мир").rsplit(                                ok=["мир"])
     _("привет мир").rsplit(                         ok=["привет", "мир"])
-    _("привет\u2009мир").rsplit(                    ok=x32(["привет", "мир"], ["привет\u2009мир"]))
+    _(u"привет\u2009мир").rsplit(                   ok=["привет", "мир"])
     _("привет мир").rsplit("и",                     ok=["пр", "вет м", "р"])
     _("привет мир").rsplit("и", 1,                  ok=["привет м", "р"])
-    _("мир \u2009").rstrip(                         ok=x32("мир", "мир \u2009"))
-    _(" мир \u2009").rstrip(                        ok=x32(" мир", " мир \u2009"))
+    _(u"мир \u2009").rstrip(                        ok="мир")
+    _(u" мир \u2009").rstrip(                       ok=" мир")
     _("мируу").rstrip('ру',                         ok="ми")
     _("мир").split(                                 ok=["мир"])
     _("привет мир").split(                          ok=["привет", "мир"])
-    _("привет\u2009мир").split(                     ok=x32(['привет', 'мир'], ["привет\u2009мир"]))
+    _(u"привет\u2009мир").split(                    ok=['привет', 'мир'])
     _("привет мир").split("и",                      ok=["пр", "вет м", "р"])
     _("привет мир").split("и", 1,                   ok=["пр", "вет мир"])
     _("мир").splitlines(                            ok=["мир"])
@@ -1782,11 +1785,12 @@ def _(*argv, **kw):
     _("мир\nтруд\nмай\n").splitlines(               ok=["мир", "труд", "май"])
     _("мир\nтруд\nмай\n").splitlines(True,          ok=["мир\n", "труд\n", "май\n"])
     # startswith            - tested in test_strings_index
-    _("\u2009 мир \u2009").strip(                   ok=x32("мир", "\u2009 мир \u2009"))
+    _(u"\u2009 мир \u2009").strip(                  ok="мир")
     _("миру мир").strip('мир',                      ok="у ")
     _("МиР").swapcase(                              ok="мИр")
     _("МиР").title(                                 ok="Мир")
     _("мир").translate({ord(u'м'):ord(u'и'), ord(u'и'):'я', ord(u'р'):None},        ok="ия")
+    _(u"\u0000\u0001\u0002.").translate([u'м', ord(u'и'), None],                    ok="ми.")
     _("МиР").upper(                                 ok="МИР")
     _("мир").zfill(10,                              ok="0000000мир")
     _("123").zfill(10,                              ok="0000000123")
diff --git a/golang/golang_test.py b/golang/golang_test.py
index 46d6e95..ce9bd0f 100644
--- a/golang/golang_test.py
+++ b/golang/golang_test.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -74,7 +74,8 @@ def _(b, func=getattr(mod, f)):
 # leaked goroutine behaviour check: done in separate process because we need
 # to test process termination exit there.
 def test_go_leaked():
-    pyrun([dir_testprog + "/golang_test_goleaked.py"])
+    pyrun([dir_testprog + "/golang_test_goleaked.py"],
+          lsan=False)   # there are on-purpose leaks in this test
 
 # benchmark go+join a thread/coroutine.
 # pyx/nogil mirror is in _golang_test.pyx
@@ -1756,6 +1757,11 @@ def _pyrun(argv, stdin=None, stdout=None, stderr=None, **kw):   # -> retcode, st
         assert len(enc) == 1
         env['PYTHONIOENCODING'] = enc.pop()
 
+    # disable LeakSanitizer if requested, e.g. when test is known to leak something on purpose
+    lsan = kw.pop('lsan', True)
+    if not lsan:
+        env['ASAN_OPTIONS'] = env.get('ASAN_OPTIONS', '') + ',detect_leaks=0'
+
     p = Popen(argv, stdin=(PIPE if stdin else None), stdout=stdout, stderr=stderr, env=env, **kw)
     stdout, stderr = p.communicate(stdin)
 
diff --git a/golang/libgolang.h b/golang/libgolang.h
index 4131a84..b606bc5 100644
--- a/golang/libgolang.h
+++ b/golang/libgolang.h
@@ -345,8 +345,13 @@ typedef struct _libgolang_runtime_ops {
     // previously successfully allocated via sema_alloc.
     void             (*sema_free)   (_libgolang_sema*);
 
-    // sema_acquire/sema_release should acquire/release live semaphore allocated via sema_alloc.
-    void             (*sema_acquire)(_libgolang_sema*);
+    // sema_acquire should try to acquire live semaphore allocated via sema_alloc during given time.
+    // it returns whether acquisition succeeded or timed out.
+    // the timeout is specified in nanoseconds.
+    // UINT64_MAX means no timeout.
+    bool             (*sema_acquire)(_libgolang_sema*, uint64_t timeout_ns);
+
+    // sema_release should release live semaphore allocated via sema_alloc.
     void             (*sema_release)(_libgolang_sema*);
 
     // nanosleep should pause current goroutine for at least dt nanoseconds.
diff --git a/golang/pyx/build_test.py b/golang/pyx/build_test.py
index af03136..503aa17 100644
--- a/golang/pyx/build_test.py
+++ b/golang/pyx/build_test.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2019  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -28,7 +28,8 @@
 # verify that we can build/run external package that uses pygolang in pyx mode.
 def test_pyx_build():
     pyxuser = testprog + "/golang_pyx_user"
-    pyrun(["setup.py", "build_ext", "-i"], cwd=pyxuser)
+    pyrun(["setup.py", "build_ext", "-i"], cwd=pyxuser,
+          lsan=False)   # gcc leaks
 
     # run built test.
     _ = pyout(["-c",
@@ -44,8 +45,8 @@ def test_pyx_build():
 # verify that we can build/run external dso that uses libgolang.
 def test_dso_build():
     dsouser = testprog + "/golang_dso_user"
-    pyrun(["setup.py", "build_dso", "-i"], cwd=dsouser)
-    pyrun(["setup.py", "build_ext", "-i"], cwd=dsouser)
+    pyrun(["setup.py", "build_dso", "-i"], cwd=dsouser,  lsan=False) # gcc leaks
+    pyrun(["setup.py", "build_ext", "-i"], cwd=dsouser,  lsan=False) # gcc leaks
 
     # run built test.
     _ = pyout(["-c",
diff --git a/golang/runtime/_libgolang.pxd b/golang/runtime/_libgolang.pxd
index 958395a..edbc5eb 100644
--- a/golang/runtime/_libgolang.pxd
+++ b/golang/runtime/_libgolang.pxd
@@ -1,5 +1,5 @@
 # cython: language_level=2
-# Copyright (C) 2019-2022  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -36,7 +36,7 @@ cdef extern from "golang/libgolang.h" namespace "golang" nogil:
 
         _libgolang_sema* (*sema_alloc)  ()
         void             (*sema_free)   (_libgolang_sema*)
-        void             (*sema_acquire)(_libgolang_sema*)
+        bint             (*sema_acquire)(_libgolang_sema*, uint64_t timeout_ns)
         void             (*sema_release)(_libgolang_sema*)
 
         void        (*nanosleep)(uint64_t)
diff --git a/golang/runtime/_runtime_gevent.pyx b/golang/runtime/_runtime_gevent.pyx
index dcf4f33..b05ae68 100644
--- a/golang/runtime/_runtime_gevent.pyx
+++ b/golang/runtime/_runtime_gevent.pyx
@@ -1,5 +1,5 @@
 # cython: language_level=2
-# Copyright (C) 2019-2023  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -40,7 +40,10 @@ ELSE:
 
 from gevent import sleep as pygsleep
 
-from libc.stdint cimport uint64_t
+from libc.stdint cimport uint8_t, uint64_t, UINT64_MAX
+cdef extern from *:
+    ctypedef bint cbool "bool"
+
 from cpython cimport PyObject, Py_INCREF, Py_DECREF
 from cython cimport final
 
@@ -95,9 +98,12 @@ cdef:
         Py_DECREF(pygsema)
         return True
 
-    bint _sema_acquire(_libgolang_sema *gsema):
+    bint _sema_acquire(_libgolang_sema *gsema, uint64_t timeout_ns, cbool* pacq):
         pygsema = <PYGSema>gsema
-        pygsema.acquire()
+        timeout = None
+        if timeout_ns != UINT64_MAX:
+            timeout = float(timeout_ns) * 1e-9
+        pacq[0] = pygsema.acquire(timeout=timeout)
         return True
 
     bint _sema_release(_libgolang_sema *gsema):
@@ -142,14 +148,16 @@ cdef nogil:
         if not ok:
             panic("pyxgo: gevent: sema: free: failed")
 
-    void sema_acquire(_libgolang_sema *gsema):
+    cbool sema_acquire(_libgolang_sema *gsema, uint64_t timeout_ns):
         cdef PyExc exc
+        cdef cbool acq
         with gil:
             pyexc_fetch(&exc)
-            ok = _sema_acquire(gsema)
+            ok = _sema_acquire(gsema, timeout_ns, &acq)
             pyexc_restore(exc)
         if not ok:
             panic("pyxgo: gevent: sema: acquire: failed")
+        return acq
 
     void sema_release(_libgolang_sema *gsema):
         cdef PyExc exc
diff --git a/golang/runtime/_runtime_thread.pyx b/golang/runtime/_runtime_thread.pyx
index 288de3d..4325e89 100644
--- a/golang/runtime/_runtime_thread.pyx
+++ b/golang/runtime/_runtime_thread.pyx
@@ -1,5 +1,5 @@
 # cython: language_level=2
-# Copyright (C) 2019-2022  Nexedi SA and Contributors.
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -35,7 +35,12 @@ from __future__ import print_function, absolute_import
 #
 # NOTE Cython declares PyThread_acquire_lock/PyThread_release_lock as nogil
 from cpython.pythread cimport PyThread_acquire_lock, PyThread_release_lock, \
-        PyThread_type_lock, WAIT_LOCK
+        PyThread_type_lock, WAIT_LOCK, NOWAIT_LOCK, PyLockStatus, PY_LOCK_ACQUIRED, PY_LOCK_FAILURE
+
+cdef extern from * nogil:
+    ctypedef int PY_TIMEOUT_T   # long long there
+    PyLockStatus PyThread_acquire_lock_timed(PyThread_type_lock, PY_TIMEOUT_T timeout_us, int intr_flag)
+
 
 # NOTE On Darwin, even though this is considered as POSIX, Python uses
 # mutex+condition variable to implement its lock, and, as of 20190828, Py2.7
@@ -98,6 +103,9 @@ from libc.errno  cimport errno, EINTR, EBADF
 from posix.fcntl cimport mode_t
 from posix.stat cimport struct_stat
 from posix.strings cimport bzero
+cdef extern from *:
+    ctypedef bint cbool "bool"
+
 IF POSIX:
     from posix.time cimport clock_gettime, nanosleep as posix_nanosleep, timespec, CLOCK_REALTIME
 ELSE:
@@ -138,11 +146,46 @@ cdef nogil:
         pysema = <PyThread_type_lock>gsema
         PyThread_free_lock(pysema)
 
-    void sema_acquire(_libgolang_sema *gsema):
+    cbool sema_acquire(_libgolang_sema *gsema, uint64_t timeout_ns):
         pysema = <PyThread_type_lock>gsema
-        ok = PyThread_acquire_lock(pysema, WAIT_LOCK)
-        if ok == 0:
-            panic("pyxgo: thread: sema_acquire: PyThread_acquire_lock failed")
+        IF PY3:
+            cdef PY_TIMEOUT_T timeout_us
+        ELSE:
+            cdef uint64_t tprev, t, tsleep
+        if timeout_ns == UINT64_MAX:
+            ok = PyThread_acquire_lock(pysema, WAIT_LOCK)
+            if ok == 0:
+                panic("pyxgo: thread: sema_acquire: PyThread_acquire_lock failed")
+            return 1
+        else:
+            IF PY3:
+                timeout_us = timeout_ns // 1000
+                lkok = PyThread_acquire_lock_timed(pysema, timeout_us, 0)
+                if lkok == PY_LOCK_FAILURE:
+                    return 0
+                elif lkok == PY_LOCK_ACQUIRED:
+                    return 1
+                else:
+                    panic("pyxgo: thread: sema_acquire: PyThread_acquire_lock_timed failed")
+            ELSE:
+                # py2 misses PyThread_acquire_lock_timed - provide fallback ourselves
+                tprev = nanotime()
+                while 1:
+                     ok = PyThread_acquire_lock(pysema, NOWAIT_LOCK)
+                     if ok:
+                         return 1
+                     tsleep = min(timeout_ns, 50*1000)    # poll every 50 μs = 20 Hz
+                     if tsleep == 0:
+                         break
+                     nanosleep(tsleep)
+                     t = nanotime()
+                     if t < tprev:
+                         break  # clock skew
+                     if t - tprev >= timeout_ns:
+                         break
+                     timeout_ns -= t - tprev
+                     tprev = t
+                return 0
 
     void sema_release(_libgolang_sema *gsema):
         pysema = <PyThread_type_lock>gsema
diff --git a/golang/runtime/libgolang.cpp b/golang/runtime/libgolang.cpp
index f91772a..a6a288a 100644
--- a/golang/runtime/libgolang.cpp
+++ b/golang/runtime/libgolang.cpp
@@ -131,6 +131,7 @@ using internal::_runtime;
 
 namespace internal { namespace atomic { extern void _init(); } }
 namespace os { namespace signal { extern void _init(); } }
+namespace time { extern void _init(); }
 void _libgolang_init(const _libgolang_runtime_ops *runtime_ops) {
     if (_runtime != nil) // XXX better check atomically
         panic("libgolang: double init");
@@ -138,6 +139,7 @@ void _libgolang_init(const _libgolang_runtime_ops *runtime_ops) {
 
     internal::atomic::_init();
     os::signal::_init();
+    time::_init();
 }
 
 void _taskgo(void (*f)(void *), void *arg) {
@@ -166,7 +168,15 @@ void _semafree(_sema *sema) {
 }
 
 void _semaacquire(_sema *sema) {
-    _runtime->sema_acquire((_libgolang_sema *)sema);
+    bool ok;
+    ok = _runtime->sema_acquire((_libgolang_sema *)sema, UINT64_MAX);
+    if (!ok)
+        panic("semaacquire: failed");
+}
+
+// NOTE not currently exposed in public API
+bool _semaacquire_timed(_sema *sema, uint64_t timeout_ns) {
+    return _runtime->sema_acquire((_libgolang_sema *)sema, timeout_ns);
 }
 
 void _semarelease(_sema *sema) {
diff --git a/golang/time.cpp b/golang/time.cpp
index b644b4c..6e893f9 100644
--- a/golang/time.cpp
+++ b/golang/time.cpp
@@ -1,4 +1,4 @@
-// Copyright (C) 2019-2020  Nexedi SA and Contributors.
+// Copyright (C) 2019-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
@@ -21,8 +21,24 @@
 // See time.h for package overview.
 
 #include "golang/time.h"
+#include "timer-wheel.h"
 
-#include <math.h>
+
+#define DEBUG 0
+#if DEBUG
+#  define debugf(format, ...) fprintf(stderr, format, ##__VA_ARGS__)
+#else
+#  define debugf(format, ...) do {} while (0)
+#endif
+
+
+// golang::sync:: (private imports)
+namespace golang {
+namespace sync {
+
+bool _semaacquire_timed(_sema *sema, uint64_t timeout_ns);
+
+}}  // golang::sync::
 
 
 // golang::time:: (except sleep and now)
@@ -30,7 +46,6 @@ namespace golang {
 namespace time {
 
 // ---- timers ----
-// FIXME timers are implemented very inefficiently - each timer currently consumes a goroutine.
 
 Ticker new_ticker(double dt);
 Timer  new_timer (double dt);
@@ -51,7 +66,12 @@ Timer after_func(double dt, func<void()> f) {
     return _new_timer(dt, f);
 }
 
-// Ticker
+Timer new_timer(double dt) {
+    return _new_timer(dt, nil);
+}
+
+
+// Ticker (small wrapper around Timer)
 _Ticker::_Ticker()  {}
 _Ticker::~_Ticker() {}
 void _Ticker::decref() {
@@ -67,9 +87,7 @@ Ticker new_ticker(double dt) {
     tx->c     = makechan<double>(1); // 1-buffer -- same as in Go
     tx->_dt   = dt;
     tx->_stop = false;
-    go([tx]() {
-        tx->_tick();
-    });
+    tx->_timer = after_func(dt, [tx]() { tx ->_tick(); });
     return tx;
 }
 
@@ -78,6 +96,10 @@ void _Ticker::stop() {
 
     tx._mu.lock();
     tx._stop = true;
+    if (tx._timer != nil) {
+        tx._timer->stop();
+        tx._timer = nil;  // break Ticker -> Timer -> _tick -> Ticker cycle
+    }
 
     // drain what _tick could have been queued already
     while (tx.c.len() > 0)
@@ -88,113 +110,379 @@ void _Ticker::stop() {
 void _Ticker::_tick() {
     _Ticker &tx = *this;
 
-    while (1) {
-        // XXX adjust for accumulated error δ?
-        sleep(tx._dt);
-
-        tx._mu.lock();
-        if (tx._stop) {
-            tx._mu.unlock();
-            return;
-        }
-
-        // send from under ._mu so that .stop can be sure there is no
-        // ongoing send while it drains the channel.
-        double t = now();
-        select({
-            _default,
-            tx.c.sends(&t),
-        });
+    tx._mu.lock();
+    if (tx._stop) {
         tx._mu.unlock();
+        return;
     }
+
+    // XXX adjust for accumulated error δ?
+    tx._timer->reset(tx._dt);
+
+    // send from under ._mu so that .stop can be sure there is no
+    // ongoing send while it drains the channel.
+    double t = now();
+    select({
+        _default,
+        tx.c.sends(&t),
+    });
+    tx._mu.unlock();
 }
 
 
-// Timer
+// Timers
+//
+// Timers are implemented via Timer Wheel.
+// For this time arrow is divided into equal periods named ticks, and Ratas
+// library[1] is used to manage timers with granularity of ticks. We employ
+// ticks to avoid unnecessary overhead of managing timeout-style timers with
+// nanosecond precision.
+//
+// Let g denote tick granularity.
+//
+// The timers are provided with guaranty that their expiration happens after
+// requested expiration time. In other words the following invariant is always true:
+//
+//      t(exp) ≤ t(fire)
+//
+// we also want that firing _ideally_ happens not much far away from requested
+// expiration time, meaning that the following property is aimed for, but not guaranteed:
+//
+//               t(fire) < t(exp) + g
+//
+// a tick Ti is associated with [i-1,i)·g time range. It is said that tick Ti
+// "happens" at i·g point in time. Firing of timers associated with tick Ti is
+// done when Ti happens - ideally at i·g time or strictly speaking ≥ that point.
+//
+// When timers are armed their expiration tick is set as Texp = ⌊t(exp)/g+1⌋ to
+// be in time range that tick Texp covers.
+//
+//
+// A special goroutine, _timer_loop, is dedicated to advance time of the
+// timer-wheel as ticks happen, and to run expired timers. When there is
+// nothing to do that goroutine pauses itself and goes to sleep until either
+// next expiration moment, or until new timer with earlier expiration time is
+// armed. To be able to simultaneously select on those two condition a
+// semaphore with acquisition timeout is employed. Please see _tSema for
+// details.
+//
+//
+// [1] Ratas - A hierarchical timer wheel.
+//     https://www.snellman.net/blog/archive/2016-07-27-ratas-hierarchical-timer-wheel,
+//     https://github.com/jsnell/ratas
+
+// Tns indicates time measured in nanoseconds.
+// It is used for documentation purposes mainly to distinguish from the time measured in ticks.
+typedef uint64_t Tns;
+
+// _tick_g is ticks granularity in nanoseconds.
+static const Tns _tick_g = 1024;   // 1 tick is ~ 1 μs
+
+
+// timer-wheel holds registry of all timers and manages them.
+static sync::Mutex* _tWheelMu;  // lock for timer wheel + sleep/wakeup channel (see _tSema & co below)
+static TimerWheel*  _tWheel;    // for each timer the wheel holds 1 reference to _TimerImpl object
+
+// _TimerImpl amends _Timer with timer-wheel entry and implementation-specific state.
+enum _TimerState {
+    _TimerDisarmed, // timer is not registered to timer wheel and is not firing
+    _TimerArmed,    // timer is     registered to timer wheel and is not firing
+    _TimerFiring    // timer is currently firing  (and not on the timer wheel)
+};
+struct _TimerImpl : _Timer {
+    void _fire();
+    void _queue_fire();
+    MemberTimerEvent<_TimerImpl, &_TimerImpl::_queue_fire>  _tWheelEntry;
+
+    func<void()> _f;
+
+    sync::Mutex _mu;
+    _TimerState _state;
+
+    // entry on "firing" list; see _tFiring for details
+    _TimerImpl* _tFiringNext;   // TODO could reuse _tWheelEntry.{next_,prev_} for "firing" list
+
+    _TimerImpl();
+    ~_TimerImpl();
+};
+
+_TimerImpl::_TimerImpl() : _tWheelEntry(this) {}
+_TimerImpl::~_TimerImpl() {}
+
 _Timer::_Timer()  {}
 _Timer::~_Timer() {}
 void _Timer::decref() {
     if (__decref())
-        delete this;
+        delete static_cast<_TimerImpl*>(this);
+}
+
+
+// _tSema and _tSleeping + _tWaking organize sleep/wakeup channel.
+//
+// Timer loop uses wakeup sema to both:
+//   * sleep until next timer expires, and
+//   * become woken up earlier if new timer with earlier expiration time is armed
+//
+// _tSleeping + _tWaking are used by the timer loop and clients to coordinate
+// _tSema operations, so that the value of sema is always 0 or 1, and that
+// every new loop cycle starts with sema=0, meaning that sema.Acquire will block.
+//
+// Besides same.Acquire, all operations on the sleep/wakeup channel are done under _tWheelMu.
+static sync::_sema* _tSema;
+static bool         _tSleeping; // 1 iff timer loop:
+                                //   \/ decided to go to sleep on wakeup sema
+                                //   \/ sleeps on wakeup sema via Acquire
+                                //   \/ woken up after Acquire before setting _tSleeping=0 back
+static bool         _tWaking;   // 1 iff client timer arm:
+                                //   /\ saw _tSleeping=1 && _tWaking=0 and decided to do wakeup
+                                //   /\ (did Release \/ will do Release)
+                                //   /\ until timer loop set back _tWaking=0
+static Tns          _tSleeping_until; // until when timer loop is sleeping if _tSleeping=1
+
+
+// _timer_loop implements timer loop: it runs in dedicated goroutine ticking the
+// timer-wheel and sleeping in between ticks.
+static void _timer_loop();
+static void _timer_loop_fire_queued();
+void _init() {
+    _tWheelMu  = new sync::Mutex();
+    _tWheel    = new TimerWheel(_nanotime() / _tick_g);
+    _tSema     = sync::_makesema();  sync::_semaacquire(_tSema); // 1 -> 0
+    _tSleeping = false;
+    _tWaking   = false;
+    _tSleeping_until = 0;
+    go(_timer_loop);
+}
+
+static void _timer_loop() {
+    while (1) {
+        // tick the wheel. This puts expired timers on firing list but delays
+        // really firing them until we release _tWheelMu.
+        _tWheelMu->lock();
+        Tick now_t  = _nanotime() / _tick_g;
+        Tick wnow_t = _tWheel->now();
+        Tick wdt_t  = now_t - wnow_t;
+        debugf("LOOP: now_t: %lu  wnow_t: %lu  δ_t %lu ...\n", now_t, wnow_t, wdt_t);
+        if (now_t > wnow_t)          // advance(0) panics. Avoid that if we wake up earlier
+            _tWheel->advance(wdt_t); // inside the same tick, e.g. due to signal.
+        _tWheelMu->unlock();
+
+        // fire the timers queued on the firing list
+        _timer_loop_fire_queued();
+
+
+        // go to sleep until next timer expires or wakeup comes from new arming.
+        //
+        // limit max sleeping time because contrary to other wheel operations -
+        // - e.g. insert and delete which are O(1), the complexity of
+        // ticks_to_next_event is O(time till next expiry).
+        Tns tsleep_max = 1*1E9; // 1s
+        bool sleeping = false;
+
+        _tWheelMu->lock();
+        Tick wsleep_t = _tWheel->ticks_to_next_event(tsleep_max / _tick_g);
+        Tick wnext_t  = _tWheel->now() + wsleep_t;
+
+        Tns tnext = wnext_t * _tick_g;
+        Tns tnow  = _nanotime();
+
+        if (tnext > tnow) {
+            _tSleeping = sleeping = true;
+            _tSleeping_until = tnext;
+        }
+        _tWheelMu->unlock();
+
+        if (!sleeping)
+            continue;
+
+        Tns tsleep = tnext - tnow;
+        debugf("LOOP: sleeping %.3f μs ...\n", tsleep / 1e3);
+
+        bool acq = sync::_semaacquire_timed(_tSema, tsleep);
+
+        // bring sleep/wakeup channel back into reset state with S=0
+        _tWheelMu->lock();
+        //  acq ^  waking   Release was done while Acquire was blocked                       S=0
+        //  acq ^ !waking   impossible
+        // !acq ^  waking   Acquire finished due to timeout;    Release was done after that  S=1
+        // !acq ^ !waking   Acquire finished due to timeout; no Release was done at all      S=0
+
+        debugf("LOOP: woken up  acq=%d  waking=%d\n", acq, _tWaking);
+
+        if ( acq && !_tWaking) {
+            _tWheelMu->unlock();
+            panic("BUG: timer loop: woken up with acq ^ !waking");
+        }
+        if (!acq &&  _tWaking) {
+            acq = sync::_semaacquire_timed(_tSema, 0); // S=1 -> acquire should be immediate
+            if (!acq) {
+                _tWheelMu->unlock();
+                panic("BUG: timer loop: reacquire after acq ^ waking failed");
+            }
+        }
+
+        _tSleeping = false;
+        _tWaking   = false;
+        _tSleeping_until = 0;
+        _tWheelMu->unlock();
+    }
 }
 
 Timer _new_timer(double dt, func<void()> f) {
-    Timer t = adoptref(new _Timer());
-    t->c    = (f == nil ? makechan<double>(1) : nil);
-    t->_f   = f;
-    t->_dt  = INFINITY;
-    t->_ver = 0;
+    _TimerImpl* _t = new _TimerImpl();
+
+    _t->c    = (f == nil ? makechan<double>(1) : nil);
+    _t->_f   = f;
+    _t->_state = _TimerDisarmed;
+    _t->_tFiringNext = nil;
+
+    Timer t = adoptref(static_cast<_Timer*>(_t));
     t->reset(dt);
     return t;
 }
 
-Timer new_timer(double dt) {
-    return _new_timer(dt, nil);
+void _Timer::reset(double dt) {
+    _TimerImpl& t = *static_cast<_TimerImpl*>(this);
+
+    if (dt <= 0)
+        dt = 0;
+
+    Tns  when   = _nanotime() + Tns(dt*1e9);
+    Tick when_t = when / _tick_g + 1;  // Ti covers [i-1,i)·g
+
+    _tWheelMu->lock();
+    t._mu.lock();
+    if (t._state != _TimerDisarmed) {
+        t._mu.unlock();
+        _tWheelMu->unlock();
+        panic("Timer.reset: the timer is armed; must be stopped or expired");
+    }
+    t._state = _TimerArmed;
+
+    Tick wnow_t = _tWheel->now();
+    Tick wdt_t;
+    if (when_t > wnow_t)
+        wdt_t = when_t - wnow_t;
+    else
+        wdt_t = 1; // schedule(0) panics
+
+    // the wheel will keep a reference to the timer
+    t.incref();
+
+    _tWheel->schedule(&t._tWheelEntry, wdt_t);
+    t._mu.unlock();
+
+    // wakeup timer loop if it is sleeping until later than new timer expiry
+    if (_tSleeping) {
+        if ((when < _tSleeping_until) && !_tWaking) {
+            debugf("USER: waking up loop\n");
+            _tWaking = true;
+            sync::_semarelease(_tSema);
+        }
+    }
+
+    _tWheelMu->unlock();
 }
 
 bool _Timer::stop() {
-    _Timer &t = *this;
+    _TimerImpl& t = *static_cast<_TimerImpl*>(this);
     bool canceled;
 
+    _tWheelMu->lock();
     t._mu.lock();
 
-    if (t._dt == INFINITY) {
+    switch (t._state) {
+    case _TimerDisarmed:
         canceled = false;
-    }
-    else {
-        t._dt  = INFINITY;
-        t._ver += 1;
+        break;
+
+    case _TimerArmed:
+        // timer wheel is holding this timer entry. Remove it from there.
+        t._tWheelEntry.cancel();
+        t.decref();
+        canceled = true;
+        break;
+
+    case _TimerFiring:
+        // the timer is on "firing" list. Timer loop will process it and skip
+        // upon seeing ._state = _TimerDisarmed. It will also be the timer loop
+        // to drop the reference to the timer that timer-wheel was holding.
         canceled = true;
+        break;
+
+    default:
+        panic("invalid timer state");
+
     }
 
+    if (canceled)
+        t._state = _TimerDisarmed;
+
     // drain what _fire could have been queued already
     while (t.c.len() > 0)
         t.c.recv();
 
     t._mu.unlock();
+    _tWheelMu->unlock();
+
     return canceled;
 }
 
-void _Timer::reset(double dt) {
-    _Timer &t = *this;
+// when timers are fired by _tWheel.advance(), they are first popped from _tWheel and put on
+// _tFiring list, so that the real firing could be done without holding _tWheelMu.
+static _TimerImpl* _tFiring     = nil;
+static _TimerImpl* _tFiringLast = nil;
+
+void _TimerImpl::_queue_fire() {
+    _TimerImpl& t = *this;
 
     t._mu.lock();
-    if (t._dt != INFINITY) {
-        t._mu.unlock();
-        panic("Timer.reset: the timer is armed; must be stopped or expired");
-    }
-    t._dt  = dt;
-    t._ver += 1;
-    // TODO rework timers so that new timer does not spawn new goroutine.
-    Timer tref = newref(&t); // pass t reference to spawned goroutine
-    go([tref, dt](int ver) {
-        tref->_fire(dt, ver);
-    }, t._ver);
+    assert(t._state == _TimerArmed);
+    t._state = _TimerFiring;
     t._mu.unlock();
+
+    t._tFiringNext = nil;
+    if (_tFiring == nil)
+        _tFiring = &t;
+    if (_tFiringLast != nil)
+        _tFiringLast->_tFiringNext = &t;
+    _tFiringLast = &t;
 }
 
-void _Timer::_fire(double dt, int ver) {
-    _Timer &t = *this;
+static void _timer_loop_fire_queued() {
+    for (_TimerImpl* t = _tFiring; t != nil;) {
+        _TimerImpl* fnext = t->_tFiringNext;
+        t->_tFiringNext = nil;
+        t->_fire();
 
-    sleep(dt);
-    t._mu.lock();
-    if (t._ver != ver) {
-        t._mu.unlock();
-        return; // the timer was stopped/resetted - don't fire it
+        t->decref(); // wheel was holding a reference to the timer
+        t = fnext;
     }
-    t._dt = INFINITY;
+    _tFiring     = nil;
+    _tFiringLast = nil;
+}
 
-    // send under ._mu so that .stop can be sure that if it sees
-    // ._dt = INFINITY, there is no ongoing .c send.
-    if (t._f == nil) {
-        t.c.send(now());
-        t._mu.unlock();
-        return;
+void _TimerImpl::_fire() {
+    _TimerImpl& t = *this;
+
+    bool fire = false;
+    t._mu.lock();
+    if (t._state == _TimerFiring) {  // stop could disarm the timer in the meantime
+        t._state = _TimerDisarmed;
+        fire = true;
+
+        debugf("LOOP: firing @ %lu ...\n", t._tWheelEntry.scheduled_at());
+
+        // send under ._mu so that .stop can be sure that if it sees
+        // ._state = _TimerDisarmed, there is no ongoing .c send.
+        if (t._f == nil)
+            t.c.send(now());
     }
     t._mu.unlock();
 
     // call ._f not from under ._mu not to deadlock e.g. if ._f wants to reset the timer.
-    t._f();
+    if (fire && t._f != nil)
+        t._f();
 }
 
 }}  // golang::time::
diff --git a/golang/time.h b/golang/time.h
index 2e687f2..18e8b93 100644
--- a/golang/time.h
+++ b/golang/time.h
@@ -1,7 +1,7 @@
 #ifndef _NXD_LIBGOLANG_TIME_H
 #define	_NXD_LIBGOLANG_TIME_H
 
-// Copyright (C) 2019-2023  Nexedi SA and Contributors.
+// Copyright (C) 2019-2024  Nexedi SA and Contributors.
 //                          Kirill Smelkov <kirr@nexedi.com>
 //
 // This program is free software: you can Use, Study, Modify and Redistribute
@@ -118,6 +118,7 @@ struct _Ticker : object {
     double      _dt;
     sync::Mutex _mu;
     bool        _stop;
+    Timer       _timer;
 
     // don't new - create only via new_ticker()
 private:
@@ -147,18 +148,12 @@ LIBGOLANG_API Timer new_timer(double dt);
 struct _Timer : object {
     chan<double> c;
 
-private:
-    func<void()> _f;
-
-    sync::Mutex  _mu;
-    double       _dt;  // +inf - stopped, otherwise - armed
-    int          _ver; // current timer was armed by n'th reset
-
     // don't new - create only via new_timer() & co
 private:
     _Timer();
     ~_Timer();
     friend Timer _new_timer(double dt, func<void()> f);
+    friend class _TimerImpl;
 public:
     LIBGOLANG_API void decref();
 
@@ -182,9 +177,6 @@ struct _Timer : object {
     //
     // the timer must be either already stopped or expired.
     LIBGOLANG_API void reset(double dt);
-
-private:
-    void _fire(double dt, int ver);
 };
 
 
diff --git a/golang/time_test.py b/golang/time_test.py
index e72c597..056367a 100644
--- a/golang/time_test.py
+++ b/golang/time_test.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2019  Nexedi SA and Contributors.
-#                     Kirill Smelkov <kirr@nexedi.com>
+# Copyright (C) 2019-2024  Nexedi SA and Contributors.
+#                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
 # it under the terms of the GNU General Public License version 3, or (at your
@@ -20,9 +20,10 @@
 
 from __future__ import print_function, absolute_import
 
-from golang import select
-from golang import time
+from golang import select, func, defer
+from golang import time, sync
 from golang.golang_test import panics
+from six.moves import range as xrange
 
 # all timer tests operate in dt units
 dt = 10*time.millisecond
@@ -65,6 +66,7 @@ def test_ticker_time():
 
 
 # test_timer verifies that Timer/Ticker fire as expected.
+@func
 def test_timer():
     # start timers at x5, x7 and x11 intervals an verify that the timers fire
     # in expected sequence. The times when the timers fire do not overlap in
@@ -73,15 +75,15 @@ def test_timer():
     tv = [] # timer events
     Tstart = time.now()
 
-    t23 = time.Timer(23*dt)
-    t5  = time.Timer( 5*dt)
+    t23 = time.Timer(23*dt);        defer(t23.stop)
+    t5  = time.Timer( 5*dt);        defer(t5 .stop)
 
     def _():
         tv.append(7)
         t7f.reset(7*dt)
-    t7f = time.Timer( 7*dt, f=_)
+    t7f = time.Timer( 7*dt, f=_);   defer(t7f.stop)
 
-    tx11 = time.Ticker(11*dt)
+    tx11 = time.Ticker(11*dt);      defer(tx11.stop)
 
     while 1:
         _, _rx = select(
@@ -108,19 +110,20 @@ def _():
 
 
 # test_timer_misc, similarly to test_timer, verifies misc timer convenience functions.
+@func
 def test_timer_misc():
     tv = []
     Tstart = time.now()
 
-    c23 = time.after(23*dt)
-    c5  = time.after( 5*dt)
+    c23 = time.after(23*dt)         # cannot stop
+    c5  = time.after( 5*dt)         # cannot stop
 
     def _():
         tv.append(7)
         t7f.reset(7*dt)
-    t7f = time.after_func(7*dt, _)
+    t7f = time.after_func(7*dt, _); defer(t7f.stop)
 
-    cx11 = time.tick(11*dt)
+    cx11 = time.tick(11*dt)         # cannot stop
 
     while 1:
         _, _rx = select(
@@ -148,13 +151,14 @@ def _():
 
 
 # test_timer_stop verifies that .stop() cancels Timer or Ticker.
+@func
 def test_timer_stop():
     tv = []
 
-    t10 = time.Timer (10*dt)
-    t2  = time.Timer ( 2*dt)    # will fire and cancel t3, tx5
-    t3  = time.Timer ( 3*dt)    # will be canceled
-    tx5 = time.Ticker( 5*dt)    # will be canceled
+    t10 = time.Timer (10*dt);   defer(t10.stop)
+    t2  = time.Timer ( 2*dt);   defer(t2 .stop) # will fire and cancel t3, tx5
+    t3  = time.Timer ( 3*dt);   defer(t3 .stop) # will be canceled
+    tx5 = time.Ticker( 5*dt);   defer(tx5.stop) # will be canceled
 
     while 1:
         _, _rx = select(
@@ -180,9 +184,10 @@ def test_timer_stop():
 
 
 # test_timer_stop_drain verifies that Timer/Ticker .stop() drains timer channel.
+@func
 def test_timer_stop_drain():
-    t  = time.Timer (1*dt)
-    tx = time.Ticker(1*dt)
+    t  = time.Timer (1*dt);     defer(t.stop)
+    tx = time.Ticker(1*dt);     defer(tx.stop)
 
     time.sleep(2*dt)
     assert len(t.c)  == 1
@@ -195,9 +200,45 @@ def test_timer_stop_drain():
     assert len(tx.c) == 0
 
 
+# test_timer_stop_vs_func verifies that Timer .stop() works correctly with func-timer.
+@func
+def test_timer_stop_vs_func():
+    tv = []
+    def _1(): tv.append(1)
+    def _2(): tv.append(2)
+
+    t1 = time.after_func(1e6*dt, _1);   defer(t1.stop)
+    t2 = time.after_func(  1*dt, _2);   defer(t2.stop)
+
+    time.sleep(2*dt)
+    assert t1.stop() == True
+    assert t2.stop() == False
+    assert tv == [2]
+
+
 # test_timer_reset_armed verifies that .reset() panics if called on armed timer.
+@func
 def test_timer_reset_armed():
     # reset while armed
-    t = time.Timer(10*dt)
+    t = time.Timer(10*dt);  defer(t.stop)
     with panics("Timer.reset: the timer is armed; must be stopped or expired"):
         t.reset(5*dt)
+
+
+# bench_timer_arm_cancel benchmarks arming timers that do not fire.
+# it shows how cheap or expensive it is to use timers to implement timeouts.
+def bench_timer_arm_cancel(b):
+    for i in xrange(b.N):
+        t = time.Timer(10*time.second)
+        _ = t.stop()
+        assert _ is True
+
+
+# bench_timer_arm_fire benchmarks arming timers that do fire.
+# it shows what it costs to go through all steps related to timer loop and firing timers.
+def bench_timer_arm_fire(b):
+    wg = sync.WaitGroup()
+    wg.add(b.N)
+    for i in xrange(b.N):
+        t = time.after_func(1*time.millisecond, wg.done)
+    wg.wait()
diff --git a/setup.py b/setup.py
index 9ef79dc..8d675b8 100644
--- a/setup.py
+++ b/setup.py
@@ -188,7 +188,7 @@ def install_egg_scripts(self, dist):
 
 # requirements of packages under "golang." namespace
 R = {
-    'cmd.pybench':      {'pytest', 'py'},
+    'cmd.pybench':      {'pytest', 'py ; python_version >= "3"'},
     'pyx.build':        {'setuptools', 'wheel', 'cython < 3', 'setuptools_dso >= 2.8'},
     'x.perf.benchlib':  {'numpy'},
 }
@@ -467,8 +467,11 @@ def defif(name, ok):
                             'golang/os/signal.h',
                             'golang/strings.h',
                             'golang/sync.h',
-                            'golang/time.h'],
-                        include_dirs    = ['3rdparty/include'],
+                            'golang/time.h',
+                            '3rdparty/ratas/src/timer-wheel.h'],
+                        include_dirs    = [
+                            '3rdparty/include',
+                            '3rdparty/ratas/src'],
                         define_macros   = [('BUILDING_LIBGOLANG', None)],
                         soversion       = '0.1'),
 
@@ -604,9 +607,6 @@ def defif(name, ok):
         Programming Language :: Python :: 2
         Programming Language :: Python :: 2.7
         Programming Language :: Python :: 3
-        Programming Language :: Python :: 3.5
-        Programming Language :: Python :: 3.6
-        Programming Language :: Python :: 3.7
         Programming Language :: Python :: 3.8
         Programming Language :: Python :: 3.9
         Programming Language :: Python :: 3.10
diff --git a/tox.ini b/tox.ini
index 6833dce..dcd823d 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,6 +1,6 @@
 [tox]
 envlist =
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread,gevent}
+    {py27d,py27,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread,gevent}
 
 
 # ThreadSanitizer
@@ -10,24 +10,23 @@ envlist =
 # (*) PyPy locks its GIL (see RPyGilAcquire) by manually doing atomic cmpxchg
 # and other games, which TSAN cannot see if PyPy itself was not compiled with
 # -fsanitize=thread.
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{thread       }-tsan
+    {py27d,py27,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{thread       }-tsan
 # XXX py*-gevent-tsan would be nice to have, but at present TSAN is not
 # effective with gevent, because it does not understand greenlet "thread"
 # switching and so perceives the program as having only one thread where races
 # are impossible. Disabled to save time.
-#   {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{       gevent}-tsan
+#   {py27d,py27,py38,py39d,py39,py310d,py310,py311d,py311,py312           }-{       gevent}-tsan
 
 
 # AddressSanitizer
 
 # XXX asan does not work with gevent: https://github.com/python-greenlet/greenlet/issues/113
-    {py27d,py27,py37,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread       }-asan
+    {py27d,py27,py38,py39d,py39,py310d,py310,py311d,py311,py312,pypy,pypy3}-{thread       }-asan
 
 [testenv]
 basepython =
     py27d:  python2.7-dbg
     py27:   python2.7
-    py37:   python3.7
     py38:   python3.8
     py39d:  python3.9-dbg
     py39:   python3.9
@@ -43,16 +42,16 @@ basepython =
 setenv =
 # distutils take CFLAGS for both C and C++.
 # distutils use  CFLAGS also at link stage -> we don't need to set LDFLAGS separately.
-    tsan: CFLAGS=-g -fsanitize=thread
-    asan: CFLAGS=-g -fsanitize=address
+    tsan: CFLAGS=-g -fsanitize=thread   -fno-omit-frame-pointer
+    asan: CFLAGS=-g -fsanitize=address  -fno-omit-frame-pointer
 # XXX however distutils' try_link, which is used by numpy.distutils use only CC
 # as linker without CFLAGS and _without_ LDFLAGS, which fails if *.o were
 # compiled with -fsanitize=X and linked without that option. Work it around
 # with also adjusting CC.
 # XXX better arrange to pass CFLAGS to pygolang only, e.g. by adding --race or
 # --sanitize=thread to `setup.py build_ext`.
-    tsan: CC=cc -fsanitize=thread
-    asan: CC=cc -fsanitize=address
+    tsan: CC=cc -fsanitize=thread   -fno-omit-frame-pointer
+    asan: CC=cc -fsanitize=address  -fno-omit-frame-pointer
 
 # always compile pygolang from source and don't reuse binary pygolang wheels as
 # we compile each case with different CFLAGS.
@@ -76,3 +75,5 @@ commands=
 # likewise for python debug builds.
         asan,tsan,py{27,39,310,311,312}d: -s    \
         gpython/ golang/
+
+allowlist_externals={toxinidir}/trun
diff --git a/trun b/trun
index 727e063..9d490fa 100755
--- a/trun
+++ b/trun
@@ -1,4 +1,5 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 # Copyright (C) 2019-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
@@ -34,7 +35,7 @@ trun cares to run python with LD_PRELOAD set appropriately to /path/to/libtsan.s
 
 from __future__ import print_function, absolute_import
 
-import os, sys, re, subprocess, types
+import os, os.path, sys, re, subprocess, platform, types
 PY3 = (bytes is not str)
 if PY3:
     from importlib import machinery as imp_machinery
@@ -87,6 +88,7 @@ def main():
 
     # determine if _golang.so is linked to a sanitizer, and if yes, to which
     # particular sanitizer DSO. Set LD_PRELOAD appropriately.
+    libxsan    = None
     ld_preload = None
     if 'linux' in sys.platform:
         p = subprocess.Popen(["ldd", _golang_so.path], stdout=subprocess.PIPE)
@@ -127,7 +129,8 @@ def main():
 
             _ = grep1("DYLD_INSERT_LIBRARIES=(.*)$", err)
             if _ is not None:
-                ld_preload = ("DYLD_INSERT_LIBRARIES", _.group(1))
+                libxsan = _.group(1)
+                ld_preload = ("DYLD_INSERT_LIBRARIES", libxsan)
             else:
                 print("trun %r: `import golang` failed with unexpected error:" % sys.argv[1:], file=sys.stderr)
                 print(err, file=sys.stderr)
@@ -144,7 +147,7 @@ def main():
     env_prepend("TSAN_OPTIONS", "halt_on_error=1")
     env_prepend("ASAN_OPTIONS", "halt_on_error=1")
 
-    # tweak TSAN/ASAN defaults:
+    # tweak TSAN/ASAN/LSAN defaults:
 
     # enable TSAN deadlock detector
     # (unfortunately it caughts only few _potential_ deadlocks and actually
@@ -152,15 +155,49 @@ def main():
     env_prepend("TSAN_OPTIONS", "detect_deadlocks=1")
     env_prepend("TSAN_OPTIONS", "second_deadlock_stack=1")
 
-    # many python allocations, whose lifetime coincides with python interpreter
-    # lifetime and which are not explicitly freed on python shutdown, are
-    # reported as leaks. Disable leak reporting to avoid huge non-pygolang
-    # related printouts.
-    env_prepend("ASAN_OPTIONS", "detect_leaks=0")
-
     # tune ASAN to check more aggressively by default
     env_prepend("ASAN_OPTIONS", "detect_stack_use_after_return=1")
 
+    # enable ASAN/LSAN leak detector.
+    #
+    # Do it only on CPython ≥ 3.11 because on py2 and on earlier py3 versions
+    # there are many many python allocations, whose lifetime coincide with
+    # python interpreter lifetime, and which are not explicitly freed on python
+    # shutdown. For py3 they significantly improved this step by step and
+    # starting from 3.11 it becomes practical to silence some still-leaks with
+    # suppressions, while for earlier py3 versions and especially for py2 it
+    # is, unfortunately, not manageable. Do not spend engineering time with
+    # activating LSAN on PyPy as that is tier 2 platform and bug tail history
+    # of memory leaks is very long even only on cpython.
+    if sys.version_info < (3,11):
+        env_prepend("ASAN_OPTIONS", "detect_leaks=0")
+        if libxsan is not None:
+            if 'asan' in libxsan.lower():
+                print("W: trun %r: asan: leak detection deactivated on %s %s" % (
+                    sys.argv[1:], platform.python_implementation(), platform.python_version()),
+                    file=sys.stderr)
+    else:
+        env_prepend("ASAN_OPTIONS", "detect_leaks=1")
+        env_prepend("LSAN_OPTIONS", "suppressions=%s" % os.path.abspath(os.path.join(
+                                        os.path.dirname(__file__), ".lsan-ignore.txt")))
+        # do not print statistics for suppressed leaks - else it breaks tests that verify program output
+        env_prepend("LSAN_OPTIONS", "print_suppressions=0")
+
+        # enable DWARF-based unwinding.
+        # else, if python is not compiled with -fno-omit-frame-pointer, it can show
+        # the whole traceback as e.g. just
+        #   Direct leak of 32 byte(s) in 1 object(s) allocated from:
+        #     #0 0x7f88522f3bd7 in malloc ../../../../src/libsanitizer/asan/asan_malloc_linux.cpp:69
+        #     #1 0x55f910a3d9a4 in PyThread_allocate_lock Python/thread_pthread.h:385
+        # and our leak suppressions won't work.
+        # this is slower compared to default frame-pointer based unwinding, but
+        # still works reasonably timely when run with just tests.
+        env_prepend("ASAN_OPTIONS", "fast_unwind_on_malloc=0")
+        # leak suppression also needs full tracebacks to work correctly, since with
+        # python there are many levels of call nesting at C level, and to filter-out e.g.
+        # top-level PyImport_Import we need to go really deep.
+        env_prepend("ASAN_OPTIONS", "malloc_context_size=255")
+
     # exec `...`
     os.execvp(sys.argv[1], sys.argv[1:])
 

From 2bb971ba618fc3fbfdbbbd6f855a606c961bf6d9 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Tue, 7 May 2024 14:34:34 +0300
Subject: [PATCH 23/29] X golang_str: Adjust bstr/ustr .encode() and .__bytes__
 to leave string domain into bytes

Initially I implemented things in such a way that (b|u)str.__bytes__
were giving bstr and ustr.encode() was giving bstr as well. My logic
here was that bstr is based on bytes and it is ok to give that.

However this logic did not pass backward compatibility test: for example
when LXML is imported it does

    cdef bytes _FILENAME_ENCODING = (sys.getfilesystemencoding() or sys.getdefaultencoding() or 'ascii').encode("UTF-8")

and under gpython it breaks with

      File "/srv/slapgrid/slappart47/srv/runner/software/7f1663e8148f227ce3c6a38fc52796e2/bin/runwsgi", line 4, in <module>
        from Products.ERP5.bin.zopewsgi import runwsgi; sys.exit(runwsgi())
      File "/srv/slapgrid/slappart47/srv/runner/software/7f1663e8148f227ce3c6a38fc52796e2/parts/erp5/product/ERP5/__init__.py", line 36, in <module>
        from Products.ERP5Type.Utils import initializeProduct, updateGlobals
      File "/srv/slapgrid/slappart47/srv/runner/software/7f1663e8148f227ce3c6a38fc52796e2/parts/erp5/product/ERP5Type/__init__.py", line 42, in <module>
        from .patches import pylint
      File "/srv/slapgrid/slappart47/srv/runner/software/7f1663e8148f227ce3c6a38fc52796e2/parts/erp5/product/ERP5Type/patches/pylint.py", line 524, in <module>
        __import__(module_name, fromlist=[module_name], level=0))
      File "src/lxml/sax.py", line 18, in init lxml.sax
      File "src/lxml/etree.pyx", line 154, in init lxml.etree
    TypeError: Expected bytes, got golang.bstr

The breakage highlights a thinko in my previous reasoning: yes bstr is based on
bytes, but bstr has different semantics compared to bytes: even though e.g.
__getitem__ works the same way for bytes on py2, it works differently compared
to py3. This way if on py3 a program is doing bytes(x) or x.encode() it then
expects the result to have bytes semantics of current python which is not the
case if the result is bstr.

-> Fix that by adjusting .encode() and .__bytes__() to produce bytes type of
   current python and leave string domain.

I initially was contemplating for some time to introduce a third type, e.g.
bvec also based on bytes, but having bytes semantic and that bvec.decode would
return back to pygolang strings domain. But due to the fact that bytes semantic
is different in between py2 and py3, it would mean that bvec provided by
pygolang would need to have different behaviours dependent on current python
version which is undesirable.

In the end with leaving into native bytes the "bytes inconsistency" problem is
left to remain under std python with pygolang targeting only to fix strings
inconsistency in between py2 and py3 and providing the same semantic for
bstr and ustr on all python versions.

It also does not harm that bytes.decode() returns std unicode instead of str:
for programs that run under unpatched python we have u() to convert the result
to ustr, while under gpython std unicode is actually ustr which makes
bytes.decode() behaviour still quite ok.

P.S. we enable bstr.encode for consistency and because under py2, if not
enabled, it will break when running pytest under gpython in

          File ".../_pytest/assertion/rewrite.py", line 352, in <module>
            RN = "\r\n".encode("utf-8")
        AttributeError: unreadable attribute
---
 golang/_golang_str.pyx    | 78 ++++++++++++++++++++++++++-------------
 golang/golang_str_test.py | 54 +++++++++++++++++----------
 2 files changed, 86 insertions(+), 46 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 6172711..46b0a45 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -106,6 +106,7 @@ from cython cimport no_gc
 from libc.stdio cimport FILE
 
 from golang cimport strconv
+import codecs as pycodecs
 import string as pystring
 import types as pytypes
 import functools as pyfunctools
@@ -343,9 +344,12 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
     # _pybstr.__new__ is hand-made in _pybstr_tp_new which invokes ↑ _pybstr__new__() .
 
 
-    def __bytes__(self):    return pyb(self)  # see __str__
-    def __unicode__(self):  return pyu(self)
+    # __bytes__ converts string to bytes leaving string domain.
+    # NOTE __bytes__ and encode are the only operations that leave string domain.
+    # NOTE __bytes__ is used only by py3 and only for `bytes(obj)` and `b'%s/%b' % obj`.
+    def __bytes__(self):    return _bdata(self)  # -> bytes
 
+    def __unicode__(self):  return pyu(self)
     def __str__(self):
         if PY_MAJOR_VERSION >= 3:
             return pyu(self)
@@ -482,13 +486,32 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
 
 
     # encode/decode
-    def decode(self, encoding=None, errors=None):
-        if encoding is None and errors is None:
-            encoding = 'utf-8'             # NOTE always UTF-8, not sys.getdefaultencoding
-            errors   = 'surrogateescape'
-        else:
-            if encoding is None:  encoding = 'utf-8'
-            if errors   is None:  errors   = 'strict'
+    #
+    # Encoding strings - both bstr and ustr - convert type to bytes leaving string domain.
+    #
+    # Encode treats bstr and ustr as string, encoding unicode representation of
+    # the string to bytes. For bstr it means that the string representation is
+    # first converted to unicode and encoded to bytes from there. For ustr
+    # unicode representation of the string is directly encoded.
+    #
+    # Decoding strings is not provided. However for bstr the decode is provided
+    # treating input data as raw bytes and producing ustr as the result.
+    #
+    # NOTE __bytes__ and encode are the only operations that leave string domain.
+    def encode(self, encoding=None, errors=None): # -> bytes
+        encoding, errors = _encoding_with_defaults(encoding, errors)
+
+        # on py2 e.g. bytes.encode('string-escape') works on bytes directly
+        if PY_MAJOR_VERSION < 3:
+            codec = pycodecs.lookup(encoding)
+            if not codec._is_text_encoding or \
+               encoding in ('string-escape',):  # string-escape also works on bytes
+                return codec.encode(self, errors)[0]
+
+        return pyu(self).encode(encoding, errors)
+
+    def decode(self, encoding=None, errors=None): # -> ustr | bstr on py2 for encodings like string-escape
+        encoding, errors = _encoding_with_defaults(encoding, errors)
 
         if encoding == 'utf-8'  and  errors == 'surrogateescape':
             x = _utf8_decode_surrogateescape(self)
@@ -499,11 +522,6 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             return pyb(x)
         return pyu(x)
 
-    if PY_MAJOR_VERSION < 3:
-        # whiteout encode inherited from bytes
-        # TODO ideally whiteout it in such a way that bstr.encode also raises AttributeError
-        encode = property(doc='bstr has no encode')
-
 
     # all other string methods
 
@@ -667,9 +685,11 @@ cdef class _pyustr(unicode):
     # _pyustr.__new__ is hand-made in _pyustr_tp_new which invokes ↑ _pyustr__new__() .
 
 
-    def __bytes__(self):    return pyb(self)
-    def __unicode__(self):  return pyu(self)  # see __str__
+    # __bytes__ converts string to bytes leaving string domain.
+    # see bstr.__bytes__ for more details.
+    def __bytes__(self):    return _bdata(pyb(self))  # -> bytes
 
+    def __unicode__(self):  return pyu(self)  # see __str__
     def __str__(self):
         if PY_MAJOR_VERSION >= 3:
             return pyu(self)  # = self  or  pyustr if it was subclass
@@ -793,20 +813,15 @@ cdef class _pyustr(unicode):
         return pyu(zunicode.__format__(self, format_spec))
 
 
-    # encode/decode
-    def encode(self, encoding=None, errors=None):
-        if encoding is None and errors is None:
-            encoding = 'utf-8'             # NOTE always UTF-8, not sys.getdefaultencoding
-            errors   = 'surrogateescape'
-        else:
-            if encoding is None:  encoding = 'utf-8'
-            if errors   is None:  errors   = 'strict'
+    # encode/decode (see bstr for details)
+    def encode(self, encoding=None, errors=None): # -> bytes
+        encoding, errors = _encoding_with_defaults(encoding, errors)
 
         if encoding == 'utf-8'  and  errors == 'surrogateescape':
             x = _utf8_encode_surrogateescape(self)
         else:
             x = zunicode.encode(self, encoding, errors)
-        return pyb(x)
+        return x
 
     if PY_MAJOR_VERSION < 3:
         # whiteout decode inherited from unicode
@@ -1987,6 +2002,18 @@ cdef extern from "Python.h":
 
 # ---- UTF-8 encode/decode ----
 
+# _encoding_with_defaults returns encoding and errors substituted with defaults
+# as needed for functions like ustr.encode and bstr.decode .
+cdef _encoding_with_defaults(encoding, errors): # -> (encoding, errors)
+    if encoding is None and errors is None:
+        encoding = 'utf-8'             # NOTE always UTF-8, not sys.getdefaultencoding
+        errors   = 'surrogateescape'
+    else:
+        if encoding is None:  encoding = 'utf-8'
+        if errors   is None:  errors   = 'strict'
+    return (encoding, errors)
+
+
 # TODO(kirr) adjust UTF-8 encode/decode surrogateescape(*) a bit so that not
 # only bytes -> unicode -> bytes is always identity for any bytes (this is
 # already true), but also that unicode -> bytes -> unicode is also always true
@@ -2238,7 +2265,6 @@ cdef _patch_str():
     # XXX explain
     bpreserve_slots = upreserve_slots = ("maketrans",)
     if PY_MAJOR_VERSION < 3:
-        bpreserve_slots += ("encode",) # @property'ies
         upreserve_slots += ("decode",)
 
     # patch unicode to be pyustr. This patches
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 6f88ad5..ea7456b 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -231,13 +231,15 @@ def test_strings_basic():
     assert b(bs) is bs;  assert bstr(bs) is bs
     assert u(us) is us;  assert ustr(us) is us
 
-    # bytes(b(·)) = identity,   unicode(u(·)) = identity
-    assert bytes  (bs) is bs
+    # unicode(u(·)) = identity
     assert unicode(us) is us
 
-    # unicode(b) -> u,  bytes(u) -> b
+    # unicode(b) -> u
     _ = unicode(bs);  assert type(_) is ustr;  assert _ == "мир"
-    _ = bytes  (us);  assert type(_) is bstr;  assert _ == "мир"
+
+    # bytes(b|u) -> bytes
+    _ = bytes(bs);  assert type(_) is x32(bytes, bstr);  assert _ == b'\xd0\xbc\xd0\xb8\xd1\x80'
+    _ = bytes(us);  assert type(_) is x32(bytes, bstr);  assert _ == b'\xd0\xbc\xd0\xb8\xd1\x80'
 
     # bytearray(b|u) -> bytearray
     _ = bytearray(bs);  assert type(_) is bytearray;  assert _ == b'\xd0\xbc\xd0\xb8\xd1\x80'
@@ -651,14 +653,13 @@ def test_strings_encodedecode():
     us = u('мир')
     bs = b('май')
 
+    _ = us.encode();         assert type(_) is bytes; assert _ == xbytes('мир')
+    _ = us.encode('utf-8');  assert type(_) is bytes; assert _ == xbytes('мир')
+    _ = bs.encode();         assert type(_) is bytes; assert _ == xbytes('май')
+    _ = bs.encode('utf-8');  assert type(_) is bytes; assert _ == xbytes('май')
+
     # TODO also raise AttributeError on .encode/.decode lookup on classes
-    assert     hasattr(us, 'encode')   ;   assert     hasattr(ustr, 'encode')
-    assert not hasattr(bs, 'encode')  #;   assert not hasattr(bstr, 'encode')
     assert not hasattr(us, 'decode')  #;   assert not hasattr(ustr, 'decode')
-    assert     hasattr(bs, 'decode')   ;   assert     hasattr(bstr, 'decode')
-
-    _ = us.encode();         assert type(_) is bstr;  assert _bdata(_) == xbytes('мир')
-    _ = us.encode('utf-8');  assert type(_) is bstr;  assert _bdata(_) == xbytes('мир')
     _ = bs.decode();         assert type(_) is ustr;  assert _udata(_) == u'май'
     _ = bs.decode('utf-8');  assert type(_) is ustr;  assert _udata(_) == u'май'
 
@@ -673,10 +674,10 @@ def test_strings_encodedecode():
     assert type(_) is ustr
     assert _udata(_) == u'мир'
 
-    b_cpmir = us.encode('cp1251')
-    assert type(b_cpmir) is bstr
-    assert _bdata(b_cpmir) == u'мир'.encode('cp1251')
-    assert _bdata(b_cpmir) == b'\xec\xe8\xf0'
+    cpmir = us.encode('cp1251')
+    assert type(cpmir) is bytes
+    assert cpmir == u'мир'.encode('cp1251')
+    assert cpmir == b'\xec\xe8\xf0'
 
     # decode/encode errors
     u_k8mir = b_k8mir.decode()                          # no decode error with
@@ -697,11 +698,14 @@ def test_strings_encodedecode():
         us.encode('ascii')
 
     _ = u_k8mir.encode()                                # no encode error with
-    assert type(_) is bstr                              # default parameters
-    assert _bdata(_) == k8mir
+    assert type(_) is bytes                             # default parameters
+    assert _ == k8mir
     _ = u_k8mir.encode('utf-8', 'surrogateescape')      # no encode error with
-    assert type(_) is bstr                              # explicit utf-8/surrogateescape
-    assert _bdata(_) == k8mir
+    assert type(_) is bytes                             # explicit utf-8/surrogateescape
+    assert _ == k8mir
+    _ = b_k8mir.encode()                                # bstr.encode = bstr -> ustr -> encode
+    assert type(_) is bytes
+    assert _ == k8mir
 
     # on py2 unicode.encode accepts surrogate pairs and does not complain
     # TODO(?) manually implement encode/py2 and reject surrogate pairs by default
@@ -724,6 +728,14 @@ def test_strings_encodedecode():
         _ = b(r'x\'y').decode('string-escape');  assert type(_) is bstr;  assert _bdata(_) == b"x'y"
         _ = b('616263').decode('hex');           assert type(_) is bstr;  assert _bdata(_) == b"abc"
 
+    # similarly for bytes.encode
+    if six.PY3:
+        with raises(LookupError):  bs.encode('hex')
+        with raises(LookupError):  bs.encode('string-escape')
+    else:
+        _ = bs.encode('hex');            assert type(_) is bytes;  assert _ == b'd0bcd0b0d0b9'
+        _ = bs.encode('string-escape');  assert type(_) is bytes;  assert _ == br'\xd0\xbc\xd0\xb0\xd0\xb9'
+
 
 # verify string operations like `x * 3` for all cases from bytes, bytearray, unicode, bstr and ustr.
 @mark.parametrize('tx', (bytes, unicode, bytearray, bstr, ustr))
@@ -1418,6 +1430,8 @@ def M(fmt, args, ok):
     M("α %s π",  BB(xbytes('мир2'))       , "α байты π")        # not мир2
     # vvv does not work on py3 as b'' % b'' does not consult __str__ nor __bytes__ of the argument
     # even though it is not 100% we are ok here, because customizing bytes or unicode is very exotic
+    #
+    # XXX the code in bytesobject.c::format_obj tells different -> recheck.
     if six.PY2:
         M("α %s π", (BB(xbytes('мир2')),)     , "α байты π")    # not мир2
     M("α %s π", [BB(xbytes('мир2'))]      , "α [BB(байты)] π")  # not [мир2]
@@ -1884,8 +1898,8 @@ class MyStr(tx):
     # for bstr/ustr  __bytes__/__unicode__ return *str, never MyStr
     # (builtin unicode has no __bytes__/__unicode__)
     if tx is not unicode:
-        _ = xx.__bytes__();    assert type(_) is bstr; assert _ == 'мир'
-        _ = xx.__unicode__();  assert type(_) is ustr; assert _ == 'мир'
+        _ = xx.__bytes__();    assert type(_) is bytes; assert _ == xbytes('мир')
+        _ = xx.__unicode__();  assert type(_) is ustr;  assert _ == 'мир'
 
 
     # subclass with __str__

From cb0e6055712b967ef45ef7f607f31f2e8c083420 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Tue, 7 May 2024 14:56:14 +0300
Subject: [PATCH 24/29] X golang_str: Fix iter(bstr) to yield byte instead of
 unicode character

Things were initially implemented to follow Go semantic exactly with
bytestring iteration yielding unicode characters as explained in
https://blog.golang.org/strings. However this makes bstr not a 100%
drop-in compatible replacement for std str under py2, and even though my
initial testing was saying this change does not affect programs in
practice it turned out to be not the case.

For example with bstr.__iter__ yielding unicode characters running
gpython on py2 will break sometimes when importing uuid:

There uuid reads 16 bytes from /dev/random and then wants to iterate
those 16 bytes as single bytes and then expects that the length
of the resulting sequence is exactly 16:

     int = long(('%02x'*16) % tuple(map(ord, bytes)), 16)

     ( https://github.com/python/cpython/blob/2.7-0-g8d21aa21f2c/Lib/uuid.py#L147 )

which breaks if some of the read bytes are higher than 0x7f.

Even though this particular problem could be worked-around with
patching uuid, there is no evidence that there will be no similar
problems later, which could be many.

-> So adjust bstr semantic instead to follow semantic of str under py2
   and introduce uiter() primitive to still be able to iterate
   bytestrings as unicode characters.

This makes bstr, hopefully, to be fully compatible with str on py2 while
still providing reasonably good approach for strings processing the
Go-way when needed.

Add biter as well for symmetry.
---
 README.rst                | 16 +++++----
 golang/__init__.py        | 11 ++++---
 golang/_golang_str.pyx    | 69 +++++++++++++++++++++++++++------------
 golang/golang_str_test.py | 49 ++++++++++++++-------------
 gpython/gpython_test.py   |  2 ++
 5 files changed, 93 insertions(+), 54 deletions(-)

diff --git a/README.rst b/README.rst
index 80846e7..03d19c0 100644
--- a/README.rst
+++ b/README.rst
@@ -241,12 +241,16 @@ The conversion, in both encoding and decoding, never fails and never looses
 information: `bstr→ustr→bstr` and `ustr→bstr→ustr` are always identity
 even if bytes data is not valid UTF-8.
 
+Both `bstr` and `ustr` represent stings. They are two different *representations* of the same entity.
+
 Semantically `bstr` is array of bytes, while `ustr` is array of
-unicode-characters. Accessing their elements by `[index]` yields byte and
-unicode character correspondingly [*]_. Iterating them, however, yields unicode
-characters for both `bstr` and `ustr`. In practice `bstr` is enough 99% of the
-time, and `ustr` only needs to be used for random access to string characters.
-See `Strings, bytes, runes and characters in Go`__ for overview of this approach.
+unicode-characters. Accessing their elements by `[index]` and iterating them yield byte and
+unicode character correspondingly [*]_. However it is possible to yield unicode
+character when iterating `bstr` via `uiter`, and to yield byte character when
+iterating `ustr` via `biter`. In practice `bstr` + `uiter` is enough 99% of
+the time, and `ustr` only needs to be used for random access to string
+characters.  See `Strings, bytes, runes and characters in Go`__ for overview of
+this approach.
 
 __ https://blog.golang.org/strings
 
@@ -267,7 +271,7 @@ Usage example::
 
    s  = b('привет')     # s is bstr corresponding to UTF-8 encoding of 'привет'.
    s += ' мир'          # s is b('привет мир')
-   for c in s:          # c will iterate through
+   for c in uiter(s):   # c will iterate through
         ...             #     [u(_) for _ in ('п','р','и','в','е','т',' ','м','и','р')]
 
    # the following gives b('привет мир труд май')
diff --git a/golang/__init__.py b/golang/__init__.py
index 00babf6..9b90797 100644
--- a/golang/__init__.py
+++ b/golang/__init__.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# Copyright (C) 2018-2023  Nexedi SA and Contributors.
+# Copyright (C) 2018-2024  Nexedi SA and Contributors.
 #                          Kirill Smelkov <kirr@nexedi.com>
 #
 # This program is free software: you can Use, Study, Modify and Redistribute
@@ -24,7 +24,7 @@
 - `func` allows to define methods separate from class.
 - `defer` allows to schedule a cleanup from the main control flow.
 - `error` and package `errors` provide error chaining.
-- `b`, `u` and `bstr`/`ustr` provide uniform UTF8-based approach to strings.
+- `b`, `u`, `bstr`/`ustr` and `biter`/`uiter` provide uniform UTF8-based approach to strings.
 - `gimport` allows to import python modules by full path in a Go workspace.
 
 See README for thorough overview.
@@ -36,7 +36,8 @@
 __version__ = "0.1"
 
 __all__ = ['go', 'chan', 'select', 'default', 'nilchan', 'defer', 'panic',
-           'recover', 'func', 'error', 'b', 'u', 'bstr', 'ustr', 'bbyte', 'uchr', 'gimport']
+           'recover', 'func', 'error', 'b', 'u', 'bstr', 'ustr', 'biter', 'uiter', 'bbyte', 'uchr',
+           'gimport']
 
 import setuptools_dso
 setuptools_dso.dylink_prepare_dso('golang.runtime.libgolang')
@@ -323,4 +324,6 @@ def _emit_exc_context(exc, emitf, recursef):
     pybbyte     as bbyte,   \
     pyu         as u,       \
     pyustr      as ustr,    \
-    pyuchr      as uchr
+    pyuchr      as uchr,    \
+    pybiter     as biter,   \
+    pyuiter     as uiter
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 46b0a45..137e302 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -141,7 +141,7 @@ cpdef pyb(s): # -> bstr
 
           b(u(bytes_input))  is bstr with the same data as bytes_input.
 
-       See also: u, bstr/ustr.
+       See also: u, bstr/ustr, biter/uiter.
     """
     bs = _pyb(pybstr, s)
     if bs is None:
@@ -164,7 +164,7 @@ cpdef pyu(s): # -> ustr
 
           u(b(unicode_input))  is ustr with the same data as unicode_input.
 
-       See also: b, bstr/ustr.
+       See also: b, bstr/ustr, biter/uiter.
     """
     us = _pyu(pyustr, s)
     if us is None:
@@ -280,8 +280,6 @@ cdef __pystr(object obj): # -> ~str
         return pyb(obj)
 
 
-# XXX -> bchr ?  (not good as "character" means "unicode character")
-#     -> bstr.chr ?
 def pybbyte(int i): # -> 1-byte bstr
     """bbyte(i) returns 1-byte bstr with ordinal i."""
     return pyb(bytearray([i]))
@@ -318,11 +316,11 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
 
     is always identity even if bytes data is not valid UTF-8.
 
-    Semantically bstr is array of bytes. Accessing its elements by [index]
-    yields byte character. Iterating through bstr, however, yields unicode
-    characters. In practice bstr is enough 99% of the time, and ustr only
-    needs to be used for random access to string characters. See
-    https://blog.golang.org/strings for overview of this approach.
+    Semantically bstr is array of bytes. Accessing its elements by [index] and
+    iterating it yield byte character. However it is possible to yield unicode
+    character when iterating bstr via uiter. In practice bstr + uiter is enough
+    99% of the time, and ustr only needs to be used for random access to string
+    characters. See https://blog.golang.org/strings for overview of this approach.
 
     Operations in between bstr and ustr/unicode / bytes/bytearray coerce to bstr.
     When the coercion happens, bytes and bytearray, similarly to bstr, are also
@@ -337,7 +335,7 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
       to bstr. See b for details.
     - otherwise bstr will have string representation of the object.
 
-    See also: b, ustr/u.
+    See also: b, ustr/u, biter/uiter.
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
@@ -414,10 +412,13 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             else:
                 return pyb(x)
 
-    # __iter__  - yields unicode characters
+    # __iter__
     def __iter__(self):
-        # TODO iterate without converting self to u
-        return pyu(self).__iter__()
+        if PY_MAJOR_VERSION >= 3:
+            return _pybstrIter(zbytes.__iter__(self))
+        else:
+            # on python 2 str does not have .__iter__
+            return PySeqIter_New(self)
 
 
     # __contains__
@@ -668,8 +669,8 @@ cdef class _pyustr(unicode):
     elements by [index] yields unicode characters.
 
     ustr complements bstr and is meant to be used only in situations when
-    random access to string characters is needed. Otherwise bstr is more
-    preferable and should be enough 99% of the time.
+    random access to string characters is needed. Otherwise bstr + uiter is
+    more preferable and should be enough 99% of the time.
 
     Operations in between ustr and bstr/bytes/bytearray / unicode coerce to ustr.
     When the coercion happens, bytes and bytearray, similarly to bstr, are also
@@ -678,7 +679,7 @@ cdef class _pyustr(unicode):
     ustr constructor, similarly to the one in bstr, accepts arbitrary objects
     and stringify them. Please refer to bstr and u documentation for details.
 
-    See also: u, bstr/b.
+    See also: u, bstr/b, biter/uiter.
     """
 
     # XXX due to "cannot `cdef class` with __new__" (https://github.com/cython/cython/issues/799)
@@ -983,17 +984,43 @@ cdef PyObject* _pyustr_tp_new(PyTypeObject* _cls, PyObject* _argv, PyObject* _kw
 assert sizeof(_pyustr) == sizeof(PyUnicodeObject)
 
 
-# _pyustrIter wraps unicode iterator to return pyustr for each yielded character.
+# _pybstrIter wraps bytes   iterator to return pybstr for each yielded byte.
+cdef class _pybstrIter:
+    cdef object zbiter
+    def __init__(self, zbiter):
+        self.zbiter = zbiter
+    def __iter__(self):
+        return self
+    def __next__(self):
+        x = next(self.zbiter)
+        if PY_MAJOR_VERSION >= 3:
+            return pybbyte(x)
+        else:
+            return pyb(x)
+
+# _pyustrIter wraps zunicode iterator to return pyustr for each yielded character.
 cdef class _pyustrIter:
-    cdef object uiter
-    def __init__(self, uiter):
-        self.uiter = uiter
+    cdef object zuiter
+    def __init__(self, zuiter):
+        self.zuiter = zuiter
     def __iter__(self):
         return self
     def __next__(self):
-        x = next(self.uiter)
+        x = next(self.zuiter)
         return pyu(x)
 
+
+def pybiter(obj):
+    """biter(obj) is like iter(b(obj)) but  TODO: iterates object incrementally
+    without doing full convertion to bstr."""
+    return iter(pyb(obj))   # TODO iterate obj directly
+
+def pyuiter(obj):
+    """uiter(obj) is like iter(u(obj)) but  TODO: iterates object incrementally
+    without doing full convertion to ustr."""
+    return iter(pyu(obj))   # TODO iterate obj directly
+
+
 # _pyustrTranslateTab wraps table for .translate to return bstr as unicode
 # because unicode.translate does not accept bstr values.
 cdef class _pyustrTranslateTab:
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index ea7456b..0c5df23 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -21,7 +21,7 @@
 from __future__ import print_function, absolute_import
 
 import golang
-from golang import b, u, bstr, ustr, bbyte, uchr, func, defer, panic
+from golang import b, u, bstr, ustr, biter, uiter, bbyte, uchr, func, defer, panic
 from golang._golang import _udata, _bdata
 from golang.gcompat import qq
 from golang.strconv_test import byterange
@@ -617,35 +617,38 @@ def test_strings_index2():
 
 # verify strings iteration.
 def test_strings_iter():
+    # iter(u/unicode) + uiter(*) -> iterate unicode characters
+    # iter(b/bytes)   + biter(*) -> iterate byte    characters
     us = u("миру мир"); u_ = u"миру мир"
-    bs = b("миру мир")
-
-    # iter( b/u/unicode ) -> iterate unicode characters
-    # NOTE that iter(b) too yields unicode characters - not integers or bytes
-    #bi  = iter(bs)         # XXX temp disabled
-    bi  = iter(us)
-    ui  = iter(us)
-    ui_ = iter(u_)
+    bs = b("миру мир"); b_ = xbytes("миру мир"); a_ = xbytearray(b_)
+
+    # XIter verifies that going through all given iterators produces the same type and results.
+    missing=object()
     class XIter:
+        def __init__(self, typok, *viter):
+            self.typok = typok
+            self.viter = viter
         def __iter__(self):
             return self
-        def __next__(self, missing=object):
-            x = next(bi, missing)
-            y = next(ui, missing)
-            z = next(ui_, missing)
-            assert type(x) is type(y)
-            if x is not missing:
-                assert type(x) is ustr
-            if z is not missing:
-                assert type(z) is unicode
-            assert x == y
-            assert y == z
-            if x is missing:
+        def __next__(self):
+            vnext = []
+            for it in self.viter:
+                obj = next(it, missing)
+                vnext.append(obj)
+            if missing in vnext:
+                assert vnext == [missing]*len(self.viter)
                 raise StopIteration
-            return x
+            for obj in vnext:
+                assert type(obj) is self.typok
+                assert obj == vnext[0]
+            return vnext[0]
         next = __next__ # py2
 
-    assert list(XIter()) == ['м','и','р','у',' ','м','и','р']
+    assert list(XIter(ustr, iter(us), uiter(us), uiter(u_), uiter(bs), uiter(b_), uiter(a_))) == \
+                ['м','и','р','у',' ','м','и','р']
+    assert list(XIter(bstr, iter(bs), biter(us), biter(u_), biter(bs), biter(b_), biter(a_))) == \
+                [b'\xd0',b'\xbc',b'\xd0',b'\xb8',b'\xd1',b'\x80',b'\xd1',b'\x83',b' ',
+                 b'\xd0',b'\xbc',b'\xd0',b'\xb8',b'\xd1',b'\x80']
 
 
 # verify .encode/.decode .
diff --git a/gpython/gpython_test.py b/gpython/gpython_test.py
index 85b97fb..a4775b1 100644
--- a/gpython/gpython_test.py
+++ b/gpython/gpython_test.py
@@ -87,6 +87,8 @@ def test_golang_builtins():
     assert u      is golang.u
     assert bstr   is golang.bstr
     assert ustr   is golang.ustr
+    assert biter  is golang.biter
+    assert uiter  is golang.uiter
     assert bbyte  is golang.bbyte
     assert uchr   is golang.uchr
 

From a341f7612402510ece95e02d308b7a88ef66cde0 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Wed, 8 May 2024 13:21:55 +0300
Subject: [PATCH 25/29] X golang_str: Fix bstr/ustr __eq__ and friends to
 return NotImplemented wrt non-string types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In 54c2a3cf (golang_str: Teach bstr/ustr to compare wrt any
string with automatic coercion) I've added __eq__, __ne__, __lt__ etc
methods to our strings, but __lt__ and other comparison to raise
TypeError against any non-string type. My idea was to mimic user-visible
py3 behaviour such as

    >>> "abc" > 1
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
    TypeError: '>' not supported between instances of 'str' and 'int'

However it turned out that the implementation was not exactly matching
what Python is doing internally which lead to incorrect behaviour when
bstr or ustr is compared wrt another type with its own __cmp__. In the
general case for `a op b` Python first queries a.__op__(b) and
b.__op'__(a) and sometimes other methods before going to .__cmp__. This
relies on the methods to return NotImplemented instead of raising an
exception and if a trial raises TypeError everything is stopped and that
TypeError is returned to the caller.

Jérome reports a real breakage due to this when bstr is compared wrt
distutils.version.LooseVersion . LooseVersion is basically

    class LooseVersion(Version):
        def __cmp__ (self, other):
            if isinstance(other, StringType):
                other = LooseVersion(other)

            return cmp(self.version, other.version)

but due to my thinko on `LooseVersion < bstr` the control flow was not
getting into that LooseVersion.__cmp__ because bstr.__gt__ was tried
first and raised TypeError.

-> Fix all comparison operations to return NotImplemented instead of
raising TypeError and make sure in the tests that this behaviour exactly
matches what native str type does.

The fix is needed not only for py2 because added test_strings_cmp_wrt_distutils_LooseVersion
was failing on py3 as well without the fix.

/reported-by @jerome
/reported-on https://lab.nexedi.com/nexedi/slapos/-/merge_requests/1575#note_206080
---
 golang/_golang_str.pyx    |  86 ++++++++++++++++++++++++++------
 golang/golang_str_test.py | 102 +++++++++++++++++++++++++++++++++-----
 2 files changed, 160 insertions(+), 28 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 137e302..ac6233d 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -383,19 +383,48 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
             return zbytes.__hash__(self)
 
     # == != < > <= >=
-    # NOTE == and != are special: they must succeed against any type so that
-    # bstr could be used as dict key.
+    # NOTE all operations must succeed against any type so that bstr could be
+    # used as dict key and arbitrary three-way comparisons, done by python,
+    # work correctly. This means that on py2 e.g. `bstr > int` will behave
+    # exactly as builtin str and won't raise TypeError. On py3 TypeError is
+    # raised for such operations by python itself when it receives
+    # NotImplemented from all tried methods.
     def __eq__(a, b):
         try:
             b = _pyb_coerce(b)
         except TypeError:
-            return False
+            return NotImplemented
         return zbytes.__eq__(a, b)
-    def __ne__(a, b):   return not a.__eq__(b)
-    def __lt__(a, b):   return zbytes.__lt__(a, _pyb_coerce(b))
-    def __gt__(a, b):   return zbytes.__gt__(a, _pyb_coerce(b))
-    def __le__(a, b):   return zbytes.__le__(a, _pyb_coerce(b))
-    def __ge__(a, b):   return zbytes.__ge__(a, _pyb_coerce(b))
+    def __ne__(a, b):
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zbytes.__ne__(a, b)
+    def __lt__(a, b):
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zbytes.__lt__(a, _pyb_coerce(b))
+    def __gt__(a, b):
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zbytes.__gt__(a, _pyb_coerce(b))
+    def __le__(a, b):
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zbytes.__le__(a, _pyb_coerce(b))
+    def __ge__(a, b):
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zbytes.__ge__(a, _pyb_coerce(b))
 
     # len - no need to override
 
@@ -724,19 +753,44 @@ cdef class _pyustr(unicode):
             return hash(pyb(self))
 
     # == != < > <= >=
-    # NOTE == and != are special: they must succeed against any type so that
-    # ustr could be used as dict key.
+    # NOTE all operations must succeed against any type.
+    # See bstr for details.
     def __eq__(a, b):
         try:
             b = _pyu_coerce(b)
         except TypeError:
-            return False
+            return NotImplemented
         return zunicode.__eq__(a, b)
-    def __ne__(a, b):   return not a.__eq__(b)
-    def __lt__(a, b):   return zunicode.__lt__(a, _pyu_coerce(b))
-    def __gt__(a, b):   return zunicode.__gt__(a, _pyu_coerce(b))
-    def __le__(a, b):   return zunicode.__le__(a, _pyu_coerce(b))
-    def __ge__(a, b):   return zunicode.__ge__(a, _pyu_coerce(b))
+    def __ne__(a, b):
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zunicode.__ne__(a, b)
+    def __lt__(a, b):
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zunicode.__lt__(a, _pyu_coerce(b))
+    def __gt__(a, b):
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zunicode.__gt__(a, _pyu_coerce(b))
+    def __le__(a, b):
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zunicode.__le__(a, _pyu_coerce(b))
+    def __ge__(a, b):
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            return NotImplemented
+        return zunicode.__ge__(a, _pyu_coerce(b))
 
     # len - no need to override
 
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 0c5df23..dfc2c92 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -942,10 +942,18 @@ def test_strings_ops2_bufreject(tx, ty):
 
     assert  (x == y) is False           # see test_strings_ops2_eq_any
     assert  (x != y) is True
-    with raises(TypeError):     x >= y
-    with raises(TypeError):     x <= y
-    with raises(TypeError):     x >  y
-    with raises(TypeError):     x <  y
+    if six.PY3:
+        with raises(TypeError): "abc" >= y  # x.__op__(y) and y.__op'__(x) both return
+        with raises(TypeError):     x >= y  # NotImplemented which leads py3 to raise TypeError
+        with raises(TypeError):     x <= y
+        with raises(TypeError):     x >  y
+        with raises(TypeError):     x <  y
+    else:
+        "abc" >= y  # does not raise but undefined
+        x >= y      # ----//----
+        x <= y
+        x >  y
+        x <  y
 
     # reverse operations, e.g. memoryview + bstr
     with raises(TypeError):     y + x
@@ -959,10 +967,18 @@ def test_strings_ops2_bufreject(tx, ty):
     y == x  # not raises TypeError  -  see test_strings_ops2_eq_any
     y != x  #
     if tx is not bstr:
-        with raises(TypeError):     y >= x
-        with raises(TypeError):     y <= x
-        with raises(TypeError):     y >  x
-        with raises(TypeError):     y <  x
+        if six.PY3:
+            with raises(TypeError):     y >= "abc"  # see ^^^
+            with raises(TypeError):     y >= x
+            with raises(TypeError):     y <= x
+            with raises(TypeError):     y >  x
+            with raises(TypeError):     y <  x
+        else:
+            y >= "abc"
+            y >= x
+            y <= x
+            y >  x
+            y <  x
 
 
 # verify string operations like `x == *` for x being bstr/ustr.
@@ -982,10 +998,19 @@ def test_strings_ops2_eq_any(tx):
     def assertNE(y):
         assert (x == y) is False
         assert (x != y) is True
-        with raises(TypeError): x >= y
-        with raises(TypeError): x <= y
-        with raises(TypeError): x >  y
-        with raises(TypeError): x <  y
+        if six.PY3:
+            with raises(TypeError): "abc" >= y  # py3: NotImplemented -> raise
+            with raises(TypeError): x >= y
+            with raises(TypeError): x <= y
+            with raises(TypeError): x >  y
+            with raises(TypeError): x <  y
+        else:
+            "abc" >= y  # py2: no raise on NotImplemented; result is undefined
+            x >= y
+            x <= y
+            x >  y
+            x <  y
+
     _ = assertNE
 
     _(None)
@@ -1009,6 +1034,21 @@ def assertNE(y):
     with raises(TypeError): hash(l)
     _(l)
 
+    # also verify that internally x.__op__(y of non-string-type) returns
+    # NotImplemented - exactly the same way as builtin str type does. Even
+    # though `x op y` gives proper answer internally python counts on x.__op__(y)
+    # to return NotImplemented so that arbitrary three-way comparison works properly.
+    s = xstr(u'мир', str)
+    for op in ('eq', 'ne', 'lt', 'gt', 'le', 'ge'):
+        sop = getattr(s, '__%s__' % op)
+        xop = getattr(x, '__%s__' % op)
+        assert sop(None) is NotImplemented
+        assert xop(None) is NotImplemented
+        assert sop(0)    is NotImplemented
+        assert xop(0)    is NotImplemented
+        assert sop(hx)   is NotImplemented
+        assert xop(hx)   is NotImplemented
+
 
 # verify logic in `bstr % ...` and `bstr.format(...)` .
 def test_strings_mod_and_format():
@@ -2624,6 +2664,44 @@ def tests_strings_early_str_subclass():
     # XXX more...
 
 
+# ---- issues hit by users ----
+# fixes for below issues have their corresponding tests in the main part above, but
+# we also add tests with original code where problems were hit.
+
+# three-way comparison wrt class with __cmp__ was working incorrectly because
+# bstr.__op__ were not returning NotImplemented wrt non-string types.
+# https://lab.nexedi.com/nexedi/slapos/-/merge_requests/1575#note_206080
+@mark.parametrize('tx', (str, bstr if str is bytes  else ustr)) # LooseVersion does not handle unicode on py2
+def test_strings_cmp_wrt_distutils_LooseVersion(tx):
+    from distutils.version import LooseVersion
+
+    l = LooseVersion('1.16.2')
+
+    x = xstr('1.12', tx)
+    assert not (x == l)
+    assert not (l == x)
+    assert      x != l
+    assert      l != x
+    assert not (x >= l)
+    assert      l >= x
+    assert      x <= l
+    assert not (l <= x)
+    assert      x < l
+    assert not (l < x)
+
+    x = xstr('1.16.2', tx)
+    assert      x == l
+    assert      l == x
+    assert not (x != l)
+    assert not (l != x)
+    assert      x >= l
+    assert      l >= x
+    assert      x <= l
+    assert      l <= x
+    assert not (x < l)
+    assert not (l < x)
+
+
 # ---- benchmarks ----
 
 # utf-8 decoding

From 84ed3e79b9ceb72c3c7fe4505c2ef1a4af747403 Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Wed, 8 May 2024 16:24:38 +0300
Subject: [PATCH 26/29] X golang_str: More fixes for bstr to be accepted as
 name of an attribute
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This time we hit that builtin getattr was rejecting it. Fix it via
patching _PyObject_LookupAttr, that builtin getattr uses, and by adding
tests for this functionality.

Reported by Jérome at https://lab.nexedi.com/nexedi/slapos/-/merge_requests/1575#note_206080
---
 golang/.gitignore           |  1 +
 golang/_golang_str.pyx      | 57 +++++++++++++++++++++++++++--------
 golang/_golang_str_test.pyx | 47 +++++++++++++++++++++++++++++
 golang/_golang_test.pyx     |  5 ++++
 golang/golang_str_test.py   | 59 ++++++++++++++++++++++++++++++++++++-
 setup.py                    |  4 ++-
 6 files changed, 158 insertions(+), 15 deletions(-)
 create mode 100644 golang/_golang_str_test.pyx

diff --git a/golang/.gitignore b/golang/.gitignore
index 7492664..e532690 100644
--- a/golang/.gitignore
+++ b/golang/.gitignore
@@ -6,6 +6,7 @@
 /_fmt_test.cpp
 /_golang.cpp
 /_golang_test.cpp
+/_golang_str_test.cpp
 /_io.cpp
 /_os.cpp
 /_os_test.cpp
diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index ac6233d..890ab9a 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -36,7 +36,8 @@ from cpython cimport PyObject_CheckBuffer
 from cpython cimport Py_TPFLAGS_HAVE_GC, Py_TPFLAGS_HEAPTYPE, Py_TPFLAGS_READY, PyType_Ready
 from cpython cimport Py_TPFLAGS_VALID_VERSION_TAG
 from cpython cimport PyBytes_Format, PyUnicode_Format, PyObject_Str
-from cpython cimport PyObject_GetAttr, PyObject_SetAttr
+from cpython cimport PyObject_GetAttr, PyObject_SetAttr, PyObject_HasAttr
+from cpython cimport PyBytes_Check
 
 cdef extern from "Python.h":
     PyTypeObject PyBytes_Type
@@ -1997,34 +1998,64 @@ cdef  _patch_capi_object_str():
 
 # XXX place, comments, test
 # on py3 PyObject_GetAttr & co insist on name to be unicode
-# XXX _PyObject_LookupAttr
 # XXX _PyObject_GenericGetAttrWithDict
 # XXX _PyObject_GenericSetAttrWithDict
 # XXX type_getattro
 IF PY3:
+    cdef extern from "Python.h":
+        int _PyObject_LookupAttr(object obj, object attr, PyObject** pres) except -1
+
     ctypedef object obj_getattr_func(object, object)
     ctypedef int    obj_setattr_func(object, object, object) except -1
-
-    cdef obj_getattr_func* _pobject_GetAttr = PyObject_GetAttr
-    cdef obj_setattr_func* _pobject_SetAttr = PyObject_SetAttr
+    #               delattr is implemented via setattr(v=NULL)
+    ctypedef bint   obj_hasattr_func(object, object) # no except
+    ctypedef int    obj_lookupattr_func(object, object, PyObject**) except -1
+
+    cdef obj_getattr_func*      _pobject_GetAttr    = PyObject_GetAttr
+    cdef obj_setattr_func*      _pobject_SetAttr    = PyObject_SetAttr
+    cdef obj_hasattr_func*      _pobject_HasAttr    = PyObject_HasAttr
+    cdef obj_lookupattr_func*   _pobject_LookupAttr = _PyObject_LookupAttr
+
+    # isbstr returns whether obj is bstr instance or not.
+    # it avoids going to isinstance unless really needed because isinstance,
+    # internally, uses _PyObject_LookupAttr and we need to patch that function
+    # with using isbstr in the hook.
+    cdef bint isbstr(obj) except -1:
+        if not PyBytes_Check(obj):
+            return False
+        if Py_TYPE(obj) == <PyTypeObject*>pybstr:
+            return True
+        # it might be also a pybstr subclass
+        return isinstance(obj, pybstr)
 
     cdef object _object_xGetAttr(object obj, object name):
-#       fprintf(stderr, "xgetattr...\n")
-        if isinstance(name, pybstr):
+        if isbstr(name):
             name = pyustr(name)
         return _pobject_GetAttr(obj, name)
 
-    cdef int    _object_xSetAttr(object obj, object name, object v) except -1:
-#       fprintf(stderr, "xsetattr...\n")
-        if isinstance(name, pybstr):
+    cdef int    _object_xSetAttr(object obj, object name, object v) except -1:  # XXX v=NULL on del
+        if isbstr(name):
             name = pyustr(name)
         return _pobject_SetAttr(obj, name, v)
 
+    cdef bint   _object_xHasAttr(object obj, object name): # no except
+        if isbstr(name):
+            name = pyustr(name)
+        return _pobject_HasAttr(obj, name)
+
+
+    cdef int    _object_xLookupAttr(object obj, object name, PyObject** pres) except -1:
+        if isbstr(name):
+            name = pyustr(name)
+        return _pobject_LookupAttr(obj, name, pres)
+
 
 cdef _patch_capi_object_attr_bstr():
     IF PY3:
-        cpatch(<void**>&_pobject_GetAttr, <void*>_object_xGetAttr)
-        cpatch(<void**>&_pobject_SetAttr, <void*>_object_xSetAttr)
+        cpatch(<void**>&_pobject_GetAttr,       <void*>_object_xGetAttr)
+        cpatch(<void**>&_pobject_SetAttr,       <void*>_object_xSetAttr)
+        cpatch(<void**>&_pobject_HasAttr,       <void*>_object_xHasAttr)
+        cpatch(<void**>&_pobject_LookupAttr,    <void*>_object_xLookupAttr)
 
 
 # ---- misc ----
@@ -2397,7 +2428,7 @@ cdef _patch_str():
 
     _patch_capi_str_format()
     _patch_capi_object_str()
-    _patch_capi_object_attr_bstr()
+    _patch_capi_object_attr_bstr()  # XXX activate under plain py as well
     _patch_capi_unicode_decode_as_bstr()
     _patch_str_pickle()
     # ...
diff --git a/golang/_golang_str_test.pyx b/golang/_golang_str_test.pyx
new file mode 100644
index 0000000..906a532
--- /dev/null
+++ b/golang/_golang_str_test.pyx
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+# cython: language_level=2
+# distutils: language=c++
+#
+# Copyright (C) 2024  Nexedi SA and Contributors.
+#                     Kirill Smelkov <kirr@nexedi.com>
+#
+# This program is free software: you can Use, Study, Modify and Redistribute
+# it under the terms of the GNU General Public License version 3, or (at your
+# option) any later version, as published by the Free Software Foundation.
+#
+# You can also Link and Combine this program with other software covered by
+# the terms of any of the Free Software licenses or any of the Open Source
+# Initiative approved licenses and Convey the resulting work. Corresponding
+# source of such a combination shall include the source code for all other
+# software used.
+#
+# This program is distributed WITHOUT ANY WARRANTY; without even the implied
+# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# See COPYING file for full licensing terms.
+# See https://www.nexedi.com/licensing for rationale and options.
+
+# helpers for golang_str_test.py that need C-level access.
+
+from cpython cimport PyObject_GetAttr, PyObject_SetAttr, PyObject_DelAttr, PyObject_HasAttr
+
+def CPyObject_GetAttr(obj, attr):           return PyObject_GetAttr(obj, attr)
+def CPyObject_SetAttr(obj, attr, v):               PyObject_SetAttr(obj, attr, v)
+def CPyObject_DelAttr(obj, attr):                  PyObject_DelAttr(obj, attr)
+def CPyObject_HasAttr(obj, attr):           return PyObject_HasAttr(obj, attr)
+
+
+IF PY3:
+    cdef extern from "Python.h":
+        int _PyObject_LookupAttr(object obj, object attr, PyObject** pres) except -1
+
+    def CPyObject_LookupAttr(obj, attr):
+        cdef PyObject* res
+        _PyObject_LookupAttr(obj, attr, &res)
+        if res == NULL:
+            raise AttributeError((obj, attr))
+        return <object>res
+
+# XXX +more capi func
+#def CPyObject_GenericGetAttr(obj, attr):    return PyObject_GenericGetAttr(obj, attr)
+#def CPyObject_GenericSetAttr(obj, attr, v): PyObject_GenericSetAttr(obj, attr, v)
diff --git a/golang/_golang_test.pyx b/golang/_golang_test.pyx
index 3c9f60e..11fe7c3 100644
--- a/golang/_golang_test.pyx
+++ b/golang/_golang_test.pyx
@@ -435,3 +435,8 @@ cdef void _bench_select_nogil__func1(chan[int] ch1, chan[int] ch2, chan[structZ]
             if not ok:
                 done.close()
                 return
+
+
+# ---- strings -----
+
+include "_golang_str_test.pyx"
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index dfc2c92..9eac585 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -26,6 +26,7 @@
 from golang.gcompat import qq
 from golang.strconv_test import byterange
 from golang.golang_test import readfile, assertDoc, _pyrun, dir_testprog, PIPE
+from golang import _golang_test
 from gpython import _tEarlyStrSubclass
 from pytest import raises, mark, skip
 import sys
@@ -2631,7 +2632,7 @@ def _(delta):
 #
 # XXX note !gpystr_only ...
 # XXX also test bytes?
-def tests_strings_early_str_subclass():
+def test_strings_early_str_subclass():
     xstr = _tEarlyStrSubclass
 
     # .tp_new should be adjusted to point to current str
@@ -2664,6 +2665,62 @@ def tests_strings_early_str_subclass():
     # XXX more...
 
 
+# verify that all string types are accepted by getattr/setattr/delattr/hasattr & co.
+@mark.parametrize('tx', (str, bstr, ustr))
+def test_strings_wrt_xxxattr(tx):
+    x = xstr(u'мир', tx)
+    assert type(x) is tx
+
+    class C: pass
+    obj = C()
+
+    t = _golang_test
+    vgetattr = [getattr, t.CPyObject_GetAttr] + [t.CPyObject_LookupAttr] if six.PY3  else []
+    vsetattr = [setattr, t.CPyObject_SetAttr]
+    vdelattr = [delattr, t.CPyObject_DelAttr]
+    vhasattr = [hasattr, t.CPyObject_HasAttr]
+
+    value = object()
+
+    # run runs f on each element of v.
+    def run(f, v):
+        for e in v:
+            f(e)
+
+    # attr is initially missing
+    def _(ga):
+        with raises(AttributeError): ga(obj, x)
+    run(_, vgetattr)
+
+    def _(ha):
+        assert ha(obj, x) is False
+    run(_, vhasattr)
+
+    def _(da):
+        with raises(AttributeError): da(obj, x)
+    run(_, vdelattr)
+
+    # set attr -> make sure it is there -> del
+    for sa in vsetattr:
+        for da in vdelattr:
+            def _(ha):
+                assert ha(obj, x) is False
+            run(_, vhasattr)
+            sa(obj, x, value)
+            def _(ha):
+                assert ha(obj, x) is True
+            run(_, vhasattr)
+            def _(ga):
+                assert ga(obj, x) is value
+            da(obj, x)
+            def _(ha):
+                assert ha(obj, x) is False
+            run(_, vhasattr)
+            def _(ga):
+                with raises(AttributeError): ga(obj, x)
+            run(_, vgetattr)
+
+
 # ---- issues hit by users ----
 # fixes for below issues have their corresponding tests in the main part above, but
 # we also add tests with original code where problems were hit.
diff --git a/setup.py b/setup.py
index 8d675b8..db7910e 100644
--- a/setup.py
+++ b/setup.py
@@ -510,7 +510,9 @@ def defif(name, ok):
                     Ext('golang._golang_test',
                         ['golang/_golang_test.pyx',
                          'golang/runtime/libgolang_test_c.c',
-                         'golang/runtime/libgolang_test.cpp']),
+                         'golang/runtime/libgolang_test.cpp'],
+                        depends = [
+                         'golang/_golang_str_test.pyx']),
 
                     Ext('golang.pyx._runtime_test',
                         ['golang/pyx/_runtime_test.pyx'],

From a69d44dda5c83b8bd8664f36c9e7b775fed6af3a Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Wed, 8 May 2024 17:30:37 +0300
Subject: [PATCH 27/29] fixup! X golang_str: More fixes for bstr to be accepted
 as name of an attribute

Contrary to py3.11, py3.9 also explicitly checks for unicode inside
builtin getattr. -> Patch that explicitly as well.
---
 golang/_golang_str.pyx | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 890ab9a..94bcad3 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -2057,6 +2057,36 @@ cdef _patch_capi_object_attr_bstr():
         cpatch(<void**>&_pobject_HasAttr,       <void*>_object_xHasAttr)
         cpatch(<void**>&_pobject_LookupAttr,    <void*>_object_xLookupAttr)
 
+        # py3 < 3.11 also verifies name to be unicode
+        # XXX move out of _patch_capi* ?
+        import builtins
+        cdef object builtins_getattr = builtins.getattr
+        cdef object builtins_setattr = builtins.setattr
+        cdef object builtins_delattr = builtins.delattr
+        cdef object builtins_hasattr = builtins.hasattr
+
+        def xgetattr(obj, name, *argv):
+            if isbstr(name):
+                name = pyustr(name)
+            return builtins_getattr(obj, name, *argv)
+        def xsetattr(obj, name, value):
+            if isbstr(name):
+                name = pyustr(name)
+            return builtins_setattr(obj, name, value)
+        def xdelattr(obj, name):
+            if isbstr(name):
+                name = pyustr(name)
+            return builtins_delattr(obj, name)
+        def xhasattr(obj, name):
+            if isbstr(name):
+                name = pyustr(name)
+            return builtins_hasattr(obj, name)
+
+        builtins.getattr = xgetattr
+        builtins.setattr = xsetattr
+        builtins.delattr = xdelattr
+        builtins.hasattr = xhasattr
+
 
 # ---- misc ----
 

From abf3dcec0309f0072093ec769158a16f668247cc Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Fri, 10 May 2024 12:22:23 +0300
Subject: [PATCH 28/29] X golang_str: Fix bstr/ustr __add__ and friends to
 return NotImplemented wrt unsupported types

In bbbb58f0 (golang_str: bstr/ustr support for + and *) I've added
support for binary string operations, but similarly to __eq__ did not
handle correctly the case for arbitrary arguments that potentially
define __radd__ and similar.

As the result it breaks when running e.g. bstr + pyparsing.Regex

      File ".../pyparsing-2.4.7-py2.7.egg/pyparsing.py", line 6591, in pyparsing_common
        _full_ipv6_address = (_ipv6_part + (':' + _ipv6_part) * 7).setName("full IPv6 address")
      File "golang/_golang_str.pyx", line 469, in golang._golang._pybstr.__add__
        return pyb(zbytes.__add__(a, _pyb_coerce(b)))
      File "golang/_golang_str.pyx", line 243, in golang._golang._pyb_coerce
        raise TypeError("b: coerce: invalid type %s" % type(x))
    TypeError: b: coerce: invalid type <class 'pyparsing.Regex'>

because pyparsing.Regex is a type, that does not inherit from str, but defines
its own __radd__ to handle str + Regex as Regex.

-> Fix it by returning NotImplemented from under __add__ and other operations
where it is needed so that bstr and ustr behave in the same way as builtin str
wrt third types, but care to handle bstr/ustr promise that

    only explicit conversion through `b` and `u` accept objects with buffer interface. Automatic coercion does not.
---
 golang/_golang_str.pyx    | 28 ++++++++++++++++++---
 golang/golang_str_test.py | 51 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index 94bcad3..b6d8640 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -466,7 +466,13 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if type(a) is not pybstr:
             assert type(b) is pybstr
             return b.__radd__(a)
-        return pyb(zbytes.__add__(a, _pyb_coerce(b)))
+        try:
+            b = _pyb_coerce(b)
+        except TypeError:
+            if not hasattr(b, '__radd__'):
+                raise  # don't let python to handle e.g. bstr + memoryview automatically
+            return NotImplemented
+        return pyb(zbytes.__add__(a, b))
 
     def __radd__(b, a):
         # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
@@ -484,7 +490,11 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
         if type(a) is not pybstr:
             assert type(b) is pybstr
             return b.__rmul__(a)
-        return pyb(zbytes.__mul__(a, b))
+        try:
+            _ = zbytes.__mul__(a, b)
+        except TypeError: # TypeError: `b` cannot be interpreted as an integer
+            return NotImplemented
+        return pyb(_)
     def __rmul__(b, a):
         return b.__mul__(a)
 
@@ -821,7 +831,13 @@ cdef class _pyustr(unicode):
         if type(a) is not pyustr:
             assert type(b) is pyustr,  type(b)
             return b.__radd__(a)
-        return pyu(zunicode.__add__(a, _pyu_coerce(b)))
+        try:
+            b = _pyu_coerce(b)
+        except TypeError:
+            if not hasattr(b, '__radd__'):
+                raise  # don't let py2 to handle e.g. unicode + buffer automatically
+            return NotImplemented
+        return pyu(zunicode.__add__(a, b))
 
     def __radd__(b, a):
         # a.__add__(b) returned NotImplementedError, e.g. for unicode.__add__(bstr)
@@ -841,7 +857,11 @@ cdef class _pyustr(unicode):
         if type(a) is not pyustr:
             assert type(b) is pyustr,  type(b)
             return b.__rmul__(a)
-        return pyu(zunicode.__mul__(a, b))
+        try:
+            _ = zunicode.__mul__(a, b)
+        except TypeError: # TypeError: `b` cannot be interpreted as an integer
+            return NotImplemented
+        return pyu(_)
     def __rmul__(b, a):
         return b.__mul__(a)
 
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index 9eac585..f9ae6c7 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -982,6 +982,57 @@ def test_strings_ops2_bufreject(tx, ty):
             y <  x
 
 
+# verify string operations like `x + y` for x being str/bstr/ustr and y being
+# arbitrary type that defines __rop__.
+@mark.parametrize('tx', (str, bstr, ustr))
+def test_strings_ops2_rop_any(tx):
+    # ROp(rop, x, y) represents call to y.__rop__(x)
+    class ROp:
+        def __init__(r, rop, x, y):
+            r.rop, r.x, r.y = rop, x, y
+        def __repr__(r):
+            return 'ROp(%r, %r, %r)' % (r.rop, r.x, r.y)
+        def __eq__(a, b):
+            return isinstance(b, ROp)  and  a.rop == b.rop  and  a.x is b.x  and  a.y is b.y
+        def __ne__(a, b):
+            return not (a == b)
+
+    class C:
+        def __radd__(b, a):         return ROp('radd', a, b)
+        def __rsub__(b, a):         return ROp('rsub', a, b)
+        def __rmul__(b, a):         return ROp('rmul', a, b)
+        def __rdiv__(b, a):         return ROp('rdiv', a, b)
+        def __rtruediv__(b, a):     return ROp('rtruediv', a, b)
+        def __rfloordiv__(b, a):    return ROp('rfloordiv', a, b)
+        def __rmod__(b, a):         return ROp('rmod', a, b)
+        def __rdivmod__(b, a):      return ROp('rdivmod', a, b)
+        def __rpow__(b, a):         return ROp('rpow', a, b)
+        def __rlshift__(b, a):      return ROp('rlshift', a, b)
+        def __rrshift__(b, a):      return ROp('rrshift', a, b)
+        def __rand__(b, a):         return ROp('rand', a, b)
+        def __rxor__(b, a):         return ROp('rxor', a, b)
+        def __ror__(b, a):          return ROp('ror', a, b)
+
+
+    x = xstr(u'мир', tx)
+    y = C()
+    R = lambda rop: ROp(rop, x, y)
+
+    assert x + y        == R('radd')
+    assert x - y        == R('rsub')
+    assert x * y        == R('rmul')
+    assert x / y        == R(x32('rtruediv', 'rdiv'))
+    assert x // y       == R('rfloordiv')
+    # x % y is always handled by str and verified in test_strings_mod_and_format
+    assert divmod(x,y)  == R('rdivmod')
+    assert x ** y       == R('rpow')
+    assert x << y       == R('rlshift')
+    assert x >> y       == R('rrshift')
+    assert x & y        == R('rand')
+    assert x ^ y        == R('rxor')
+    assert x | y        == R('ror')
+
+
 # verify string operations like `x == *` for x being bstr/ustr.
 # Those operations must succeed for any hashable type or else bstr/ustr could
 # not be used as dict keys.

From 93e9c25a6f527a5aa26cab92f3c0cdc57271b68d Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Fri, 10 May 2024 17:14:38 +0300
Subject: [PATCH 29/29] X golang_str: Add ustr.decode for symmetry with
 bstr.decode and because gpy2 breaks without it

Without working unicode.decode gpy2 fails when running ERP5 as follows:

    $ /srv/slapgrid/slappart49/t/ekg/i/5/bin/runTestSuite --help
    No handlers could be found for logger "SecurityInfo"
    Traceback (most recent call last):
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/bin/.runTestSuite.pyexe", line 296, in <module>
        main()
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/pygolang/gpython/__init__.py", line 484, in main
        pymain(argv, init)
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/pygolang/gpython/__init__.py", line 292, in pymain
        run(mmain)
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/pygolang/gpython/__init__.py", line 192, in run
        _execfile(filepath, mmain.__dict__)
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/pygolang/gpython/__init__.py", line 339, in _execfile
        six.exec_(code, globals, locals)
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/eggs/six-1.16.0-py2.7.egg/six.py", line 735, in exec_
        exec("""exec _code_ in _globs_, _locs_""")
      File "<string>", line 1, in <module>
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/bin/runTestSuite", line 10, in <module>
        from Products.ERP5Type.tests.runTestSuite import main; sys.exit(main())
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/erp5/product/ERP5Type/__init__.py", line 96, in <module>
        from . import ZopePatch
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/erp5/product/ERP5Type/ZopePatch.py", line 75, in <module>
        from Products.ERP5Type.patches import ZopePageTemplateUtils
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/parts/erp5/product/ERP5Type/patches/ZopePageTemplateUtils.py", line 58, in <module>
        convertToUnicode(u'', 'text/xml', ())
      File "/srv/slapgrid/slappart49/t/ekg/soft/b5048b47894a7612651c7fe81c2c8636/eggs/Zope-4.8.9+slapospatched002-py2.7.egg/Products/PageTemplates/utils.py", line 73, in convertToUnicode
        return source.decode(encoding), encoding
    AttributeError: unreadable attribute

and in general if we treat both bstr ans ustr being two different
representations of the same entity, if we have bstr.decode, having
ustr.decode is also needed for symmetry with both operations converting
bytes representation of the string into unicode.

Now there is full symmetry in between bstr/ustr and encode/decode. Quoting updated encode/decode text:

    Encode encodes unicode representation of the string into bytes, leaving string domain.
    Decode decodes bytes   representation of the string into ustr, staying inside string domain.

    Both bstr and ustr are accepted by encode and decode treating them as two
    different representations of the same entity.

    On encoding, for bstr, the string representation is first converted to
    unicode and encoded to bytes from there. For ustr unicode representation
    of the string is directly encoded.

    On decoding, for ustr, the string representation is first converted to
    bytes and decoded to unicode from there. For bstr bytes representation of
    the string is directly decoded.
---
 golang/_golang_str.pyx    |  61 +++++++++++++++-------
 golang/golang_str_test.py | 106 +++++++++++++++++++++-----------------
 2 files changed, 100 insertions(+), 67 deletions(-)

diff --git a/golang/_golang_str.pyx b/golang/_golang_str.pyx
index b6d8640..0e43f48 100644
--- a/golang/_golang_str.pyx
+++ b/golang/_golang_str.pyx
@@ -528,25 +528,31 @@ cdef class _pybstr(bytes):   # https://github.com/cython/cython/issues/711
 
     # encode/decode
     #
-    # Encoding strings - both bstr and ustr - convert type to bytes leaving string domain.
+    # Encode encodes unicode representation of the string into bytes, leaving string domain.
+    # Decode decodes bytes   representation of the string into ustr, staying inside string domain.
     #
-    # Encode treats bstr and ustr as string, encoding unicode representation of
-    # the string to bytes. For bstr it means that the string representation is
-    # first converted to unicode and encoded to bytes from there. For ustr
-    # unicode representation of the string is directly encoded.
+    # Both bstr and ustr are accepted by encode and decode treating them as two
+    # different representations of the same entity.
     #
-    # Decoding strings is not provided. However for bstr the decode is provided
-    # treating input data as raw bytes and producing ustr as the result.
+    # On encoding, for bstr, the string representation is first converted to
+    # unicode and encoded to bytes from there. For ustr unicode representation
+    # of the string is directly encoded.
+    #
+    # On decoding, for ustr, the string representation is first converted to
+    # bytes and decoded to unicode from there. For bstr bytes representation of
+    # the string is directly decoded.
     #
     # NOTE __bytes__ and encode are the only operations that leave string domain.
     def encode(self, encoding=None, errors=None): # -> bytes
         encoding, errors = _encoding_with_defaults(encoding, errors)
 
+        if encoding == 'utf-8'  and  errors == 'surrogateescape':
+            return _bdata(self)
+
         # on py2 e.g. bytes.encode('string-escape') works on bytes directly
         if PY_MAJOR_VERSION < 3:
-            codec = pycodecs.lookup(encoding)
-            if not codec._is_text_encoding or \
-               encoding in ('string-escape',):  # string-escape also works on bytes
+            codec = _pycodecs_lookup_binary(encoding)
+            if codec is not None:
                 return codec.encode(self, errors)[0]
 
         return pyu(self).encode(encoding, errors)
@@ -894,15 +900,23 @@ cdef class _pyustr(unicode):
         encoding, errors = _encoding_with_defaults(encoding, errors)
 
         if encoding == 'utf-8'  and  errors == 'surrogateescape':
-            x = _utf8_encode_surrogateescape(self)
-        else:
-            x = zunicode.encode(self, encoding, errors)
-        return x
+            return _utf8_encode_surrogateescape(self)
 
-    if PY_MAJOR_VERSION < 3:
-        # whiteout decode inherited from unicode
-        # TODO ideally whiteout it in such a way that ustr.decode also raises AttributeError
-        decode = property(doc='ustr has no decode')
+        # on py2 e.g. 'string-escape' works on bytes
+        if PY_MAJOR_VERSION < 3:
+            codec = _pycodecs_lookup_binary(encoding)
+            if codec is not None:
+                return codec.encode(pyb(self), errors)[0]
+
+        return zunicode.encode(self, encoding, errors)
+
+    def decode(self, encoding=None, errors=None): # -> ustr | bstr for  encodings like string-escape
+        encoding, errors = _encoding_with_defaults(encoding, errors)
+
+        if encoding == 'utf-8'  and  errors == 'surrogateescape':
+            return pyu(self)
+
+        return pyb(self).decode(encoding, errors)
 
 
     # all other string methods
@@ -2161,6 +2175,15 @@ cdef extern from "Python.h":
     """
     bint _XPyMapping_Check(object o)
 
+# _pycodecs_lookup_binary returns codec corresponding to encoding if the codec works on binary input.
+# example of such codecs are string-escape and hex encodings.
+cdef _pycodecs_lookup_binary(encoding): # -> codec | None (text) | LookupError (no such encoding)
+    codec = pycodecs.lookup(encoding)
+    if not codec._is_text_encoding or \
+       encoding in ('string-escape',):  # string-escape also works on bytes
+        return codec
+    return None
+
 
 # ---- UTF-8 encode/decode ----
 
@@ -2426,8 +2449,6 @@ cdef _patch_str():
 
     # XXX explain
     bpreserve_slots = upreserve_slots = ("maketrans",)
-    if PY_MAJOR_VERSION < 3:
-        upreserve_slots += ("decode",)
 
     # patch unicode to be pyustr. This patches
     # - unicode (py2)
diff --git a/golang/golang_str_test.py b/golang/golang_str_test.py
index f9ae6c7..0896032 100644
--- a/golang/golang_str_test.py
+++ b/golang/golang_str_test.py
@@ -657,58 +657,61 @@ def test_strings_encodedecode():
     us = u('мир')
     bs = b('май')
 
-    _ = us.encode();         assert type(_) is bytes; assert _ == xbytes('мир')
-    _ = us.encode('utf-8');  assert type(_) is bytes; assert _ == xbytes('мир')
-    _ = bs.encode();         assert type(_) is bytes; assert _ == xbytes('май')
-    _ = bs.encode('utf-8');  assert type(_) is bytes; assert _ == xbytes('май')
-
-    # TODO also raise AttributeError on .encode/.decode lookup on classes
-    assert not hasattr(us, 'decode')  #;   assert not hasattr(ustr, 'decode')
-    _ = bs.decode();         assert type(_) is ustr;  assert _udata(_) == u'май'
-    _ = bs.decode('utf-8');  assert type(_) is ustr;  assert _udata(_) == u'май'
+    # encode does obj.encode and makes sure result type is bytes
+    def encode(obj, *argv):
+        _ = obj.encode(*argv)
+        assert type(_) is bytes
+        return _
+
+    # decode does obj.decode and makes sure result type is ustr
+    def decode(obj, *argv):
+        _ = obj.decode(*argv)
+        assert type(_) is ustr
+        return _
+
+    _ = encode(us);           assert _ == xbytes('мир')
+    _ = encode(us, 'utf-8');  assert _ == xbytes('мир')
+    _ = encode(bs);           assert _ == xbytes('май')
+    _ = encode(bs, 'utf-8');  assert _ == xbytes('май')
+
+    _ = decode(us);           assert _udata(_) == u'мир'
+    _ = decode(us, 'utf-8');  assert _udata(_) == u'мир'
+    _ = decode(bs);           assert _udata(_) == u'май'
+    _ = decode(bs, 'utf-8');  assert _udata(_) == u'май'
 
     # !utf-8
-    k8mir = u'мир'.encode('koi8-r')
-    b_k8mir = b(k8mir)
-    assert type(b_k8mir) is bstr
-    assert _bdata(b_k8mir) == k8mir
-    assert _bdata(b_k8mir) == b'\xcd\xc9\xd2'
+    k8mir = u'мир'.encode('koi8-r');  assert k8mir == b'\xcd\xc9\xd2'
+    b_k8mir = b(k8mir);  assert type(b_k8mir) is bstr;  assert _bdata(b_k8mir) == b'\xcd\xc9\xd2'
+    u_k8mir = u(k8mir);  assert type(u_k8mir) is ustr;  assert _udata(u_k8mir) == u'\udccd\udcc9\udcd2'
 
-    _ = b_k8mir.decode('koi8-r')
-    assert type(_) is ustr
-    assert _udata(_) == u'мир'
+    _ = decode(b_k8mir, 'koi8-r');  assert _udata(_) == u'мир'
+    _ = decode(u_k8mir, 'koi8-r');  assert _udata(_) == u'мир'
 
-    cpmir = us.encode('cp1251')
-    assert type(cpmir) is bytes
-    assert cpmir == u'мир'.encode('cp1251')
-    assert cpmir == b'\xec\xe8\xf0'
+    _ = encode(us, 'cp1251');  assert _ == u'мир'.encode('cp1251');  assert _ == b'\xec\xe8\xf0'
+    _ = encode(bs, 'cp1251');  assert _ == u'май'.encode('cp1251');  assert _ == b'\xec\xe0\xe9'
 
     # decode/encode errors
-    u_k8mir = b_k8mir.decode()                          # no decode error with
-    assert type(u_k8mir) is ustr                        # default parameters
-    assert _udata(u_k8mir) == u'\udccd\udcc9\udcd2'
-    _ = b_k8mir.decode('utf-8', 'surrogateescape')      # no decode error with
-    assert type(_) is ustr                              # explicit utf-8/surrogateescape
-    assert _udata(_) == _udata(u_k8mir)
-
-    with raises(UnicodeDecodeError):  # decode error if encoding is explicitly specified
-        b_k8mir.decode('utf-8')
-    with raises(UnicodeDecodeError):
-        b_k8mir.decode('utf-8', 'strict')
-    with raises(UnicodeDecodeError):
-        b_k8mir.decode('ascii')
-
-    with raises(UnicodeEncodeError):
-        us.encode('ascii')
-
-    _ = u_k8mir.encode()                                # no encode error with
-    assert type(_) is bytes                             # default parameters
-    assert _ == k8mir
-    _ = u_k8mir.encode('utf-8', 'surrogateescape')      # no encode error with
-    assert type(_) is bytes                             # explicit utf-8/surrogateescape
+    _ = decode(b_k8mir);  assert _ == u_k8mir           # no decode error with default parameters
+    _ = decode(b_k8mir, 'utf-8', 'surrogateescape')     # or with explicit utf-8/surrogateescape
+    assert _ == u_k8mir
+    _ = decode(u_k8mir);  assert _ == u_k8mir
+    _ = decode(u_k8mir, 'utf-8', 'surrogateescape');  assert _ == u_k8mir
+
+    with raises(UnicodeDecodeError):  b_k8mir.decode('utf-8')   # decode error on unmatching explicit encoding
+    with raises(UnicodeDecodeError):  u_k8mir.decode('utf-8')
+    with raises(UnicodeDecodeError):  b_k8mir.decode('utf-8', 'strict')
+    with raises(UnicodeDecodeError):  u_k8mir.decode('utf-8', 'strict')
+    with raises(UnicodeDecodeError):  b_k8mir.decode('ascii')
+    with raises(UnicodeDecodeError):  u_k8mir.decode('ascii')
+
+    with raises(UnicodeEncodeError):  us.encode('ascii')    # encode error if target encoding cannot represent string
+    with raises(UnicodeEncodeError):  bs.encode('ascii')
+
+    _ = encode(u_k8mir);  assert _ == k8mir             # no encode error with default parameters
+    _ = encode(u_k8mir, 'utf-8', 'surrogateescape')     # or with explicit utf-8/surrogateescape
     assert _ == k8mir
-    _ = b_k8mir.encode()                                # bstr.encode = bstr -> ustr -> encode
-    assert type(_) is bytes
+    _ = encode(b_k8mir);  assert _ == k8mir             # bstr.encode = bstr -> ustr -> encode
+    _ = encode(b_k8mir, 'utf-8', 'surrogateescape')
     assert _ == k8mir
 
     # on py2 unicode.encode accepts surrogate pairs and does not complain
@@ -726,19 +729,28 @@ def test_strings_encodedecode():
     # verify that this exact semantic is preserved
     if six.PY3:
         with raises(LookupError):  bs.decode('hex')
+        with raises(LookupError):  us.decode('hex')
         with raises(LookupError):  bs.decode('string-escape')
+        with raises(LookupError):  us.decode('string-escape')
     else:
         _ = bs.decode('string-escape');          assert type(_) is bstr;  assert _ == bs
+        _ = us.decode('string-escape');          assert type(_) is bstr;  assert _ == us
         _ = b(r'x\'y').decode('string-escape');  assert type(_) is bstr;  assert _bdata(_) == b"x'y"
+        _ = u(r'x\'y').decode('string-escape');  assert type(_) is bstr;  assert _bdata(_) == b"x'y"
         _ = b('616263').decode('hex');           assert type(_) is bstr;  assert _bdata(_) == b"abc"
+        _ = u('616263').decode('hex');           assert type(_) is bstr;  assert _bdata(_) == b"abc"
 
     # similarly for bytes.encode
     if six.PY3:
         with raises(LookupError):  bs.encode('hex')
+        with raises(LookupError):  us.encode('hex')
         with raises(LookupError):  bs.encode('string-escape')
+        with raises(LookupError):  us.encode('string-escape')
     else:
-        _ = bs.encode('hex');            assert type(_) is bytes;  assert _ == b'd0bcd0b0d0b9'
-        _ = bs.encode('string-escape');  assert type(_) is bytes;  assert _ == br'\xd0\xbc\xd0\xb0\xd0\xb9'
+        _ = encode(bs, 'hex');            assert _ == b'd0bcd0b0d0b9'
+        _ = encode(us, 'hex');            assert _ == b'd0bcd0b8d180'
+        _ = encode(bs, 'string-escape');  assert _ == br'\xd0\xbc\xd0\xb0\xd0\xb9'
+        _ = encode(us, 'string-escape');  assert _ == br'\xd0\xbc\xd0\xb8\xd1\x80'
 
 
 # verify string operations like `x * 3` for all cases from bytes, bytearray, unicode, bstr and ustr.