@@ -872,16 +872,30 @@ def test_ignore_case(self):
872
872
self .assertEqual (re .match (r"((a)\s(abc|a))" , "a a" , re .I ).group (1 ), "a a" )
873
873
self .assertEqual (re .match (r"((a)\s(abc|a)*)" , "a aa" , re .I ).group (1 ), "a aa" )
874
874
875
- assert '\u212a ' .lower () == 'k' # 'K'
875
+ # Two different characters have the same lowercase.
876
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
876
877
self .assertTrue (re .match (r'K' , '\u212a ' , re .I ))
877
878
self .assertTrue (re .match (r'k' , '\u212a ' , re .I ))
878
879
self .assertTrue (re .match (r'\u212a' , 'K' , re .I ))
879
880
self .assertTrue (re .match (r'\u212a' , 'k' , re .I ))
880
- assert '\u017f ' .upper () == 'S' # 'ſ'
881
+
882
+ # Two different characters have the same uppercase.
883
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
881
884
self .assertTrue (re .match (r'S' , '\u017f ' , re .I ))
882
885
self .assertTrue (re .match (r's' , '\u017f ' , re .I ))
883
886
self .assertTrue (re .match (r'\u017f' , 'S' , re .I ))
884
887
self .assertTrue (re .match (r'\u017f' , 's' , re .I ))
888
+
889
+ # Two different characters have the same uppercase. Unicode 9.0+.
890
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
891
+ self .assertTrue (re .match (r'\u0412' , '\u0432 ' , re .I ))
892
+ self .assertTrue (re .match (r'\u0412' , '\u1c80 ' , re .I ))
893
+ self .assertTrue (re .match (r'\u0432' , '\u0412 ' , re .I ))
894
+ self .assertTrue (re .match (r'\u0432' , '\u1c80 ' , re .I ))
895
+ self .assertTrue (re .match (r'\u1c80' , '\u0412 ' , re .I ))
896
+ self .assertTrue (re .match (r'\u1c80' , '\u0432 ' , re .I ))
897
+
898
+ # Two different characters have the same multicharacter uppercase.
885
899
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
886
900
self .assertTrue (re .match (r'\ufb05' , '\ufb06 ' , re .I ))
887
901
self .assertTrue (re .match (r'\ufb06' , '\ufb05 ' , re .I ))
@@ -895,16 +909,31 @@ def test_ignore_case_set(self):
895
909
self .assertTrue (re .match (br'[19a]' , b'a' , re .I ))
896
910
self .assertTrue (re .match (br'[19a]' , b'A' , re .I ))
897
911
self .assertTrue (re .match (br'[19A]' , b'a' , re .I ))
898
- assert '\u212a ' .lower () == 'k' # 'K'
912
+
913
+ # Two different characters have the same lowercase.
914
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
899
915
self .assertTrue (re .match (r'[19K]' , '\u212a ' , re .I ))
900
916
self .assertTrue (re .match (r'[19k]' , '\u212a ' , re .I ))
901
917
self .assertTrue (re .match (r'[19\u212a]' , 'K' , re .I ))
902
918
self .assertTrue (re .match (r'[19\u212a]' , 'k' , re .I ))
903
- assert '\u017f ' .upper () == 'S' # 'ſ'
919
+
920
+ # Two different characters have the same uppercase.
921
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
904
922
self .assertTrue (re .match (r'[19S]' , '\u017f ' , re .I ))
905
923
self .assertTrue (re .match (r'[19s]' , '\u017f ' , re .I ))
906
924
self .assertTrue (re .match (r'[19\u017f]' , 'S' , re .I ))
907
925
self .assertTrue (re .match (r'[19\u017f]' , 's' , re .I ))
926
+
927
+ # Two different characters have the same uppercase. Unicode 9.0+.
928
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
929
+ self .assertTrue (re .match (r'[19\u0412]' , '\u0432 ' , re .I ))
930
+ self .assertTrue (re .match (r'[19\u0412]' , '\u1c80 ' , re .I ))
931
+ self .assertTrue (re .match (r'[19\u0432]' , '\u0412 ' , re .I ))
932
+ self .assertTrue (re .match (r'[19\u0432]' , '\u1c80 ' , re .I ))
933
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0412 ' , re .I ))
934
+ self .assertTrue (re .match (r'[19\u1c80]' , '\u0432 ' , re .I ))
935
+
936
+ # Two different characters have the same multicharacter uppercase.
908
937
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
909
938
self .assertTrue (re .match (r'[19\ufb05]' , '\ufb06 ' , re .I ))
910
939
self .assertTrue (re .match (r'[19\ufb06]' , '\ufb05 ' , re .I ))
@@ -928,16 +957,30 @@ def test_ignore_case_range(self):
928
957
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010428 ' , re .I ))
929
958
self .assertTrue (re .match (r'[\U00010400-\U00010427]' , '\U00010400 ' , re .I ))
930
959
931
- assert '\u212a ' .lower () == 'k' # 'K'
960
+ # Two different characters have the same lowercase.
961
+ assert 'K' .lower () == '\u212a ' .lower () == 'k' # 'K'
932
962
self .assertTrue (re .match (r'[J-M]' , '\u212a ' , re .I ))
933
963
self .assertTrue (re .match (r'[j-m]' , '\u212a ' , re .I ))
934
964
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'K' , re .I ))
935
965
self .assertTrue (re .match (r'[\u2129-\u212b]' , 'k' , re .I ))
936
- assert '\u017f ' .upper () == 'S' # 'ſ'
966
+
967
+ # Two different characters have the same uppercase.
968
+ assert 's' .upper () == '\u017f ' .upper () == 'S' # 'ſ'
937
969
self .assertTrue (re .match (r'[R-T]' , '\u017f ' , re .I ))
938
970
self .assertTrue (re .match (r'[r-t]' , '\u017f ' , re .I ))
939
971
self .assertTrue (re .match (r'[\u017e-\u0180]' , 'S' , re .I ))
940
972
self .assertTrue (re .match (r'[\u017e-\u0180]' , 's' , re .I ))
973
+
974
+ # Two different characters have the same uppercase. Unicode 9.0+.
975
+ assert '\u0432 ' .upper () == '\u1c80 ' .upper () == '\u0412 ' # 'в', 'ᲀ', 'В'
976
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u0432 ' , re .I ))
977
+ self .assertTrue (re .match (r'[\u0411-\u0413]' , '\u1c80 ' , re .I ))
978
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u0412 ' , re .I ))
979
+ self .assertTrue (re .match (r'[\u0431-\u0433]' , '\u1c80 ' , re .I ))
980
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0412 ' , re .I ))
981
+ self .assertTrue (re .match (r'[\u1c80-\u1c82]' , '\u0432 ' , re .I ))
982
+
983
+ # Two different characters have the same multicharacter uppercase.
941
984
assert '\ufb05 ' .upper () == '\ufb06 ' .upper () == 'ST' # 'ſt', 'st'
942
985
self .assertTrue (re .match (r'[\ufb04-\ufb05]' , '\ufb06 ' , re .I ))
943
986
self .assertTrue (re .match (r'[\ufb06-\ufb07]' , '\ufb05 ' , re .I ))
0 commit comments