To: vim_dev@googlegroups.com Subject: Patch 9.0.1629 Fcc: outbox From: Bram Moolenaar Mime-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ------------ Patch 9.0.1629 Problem: Having utf16idx() rounding up is inconvenient. Solution: Make utf16idx() round down. (Yegappan Lakshmanan, closes #12523) Files: runtime/doc/builtin.txt, src/strings.c, src/testdir/test_functions.vim *** ../vim-9.0.1628/runtime/doc/builtin.txt 2023-06-08 17:09:40.188768829 +0100 --- runtime/doc/builtin.txt 2023-06-14 13:05:21.862735424 +0100 *************** *** 10114,10121 **** When {charidx} is present and TRUE, {idx} is used as the character index in the String {string} instead of as the byte index. ! An {idx} in the middle of a UTF-8 sequence is rounded upwards ! to the end of that sequence. Returns -1 if the arguments are invalid or if there are less than {idx} bytes in {string}. If there are exactly {idx} bytes --- 10127,10134 ---- When {charidx} is present and TRUE, {idx} is used as the character index in the String {string} instead of as the byte index. ! An {idx} in the middle of a UTF-8 sequence is rounded ! downwards to the beginning of that sequence. Returns -1 if the arguments are invalid or if there are less than {idx} bytes in {string}. If there are exactly {idx} bytes *** ../vim-9.0.1628/src/strings.c 2023-06-08 17:09:40.188768829 +0100 --- src/strings.c 2023-06-14 13:05:21.862735424 +0100 *************** *** 1743,1750 **** /* - * * "utf16idx()" function */ void f_utf16idx(typval_T *argvars, typval_T *rettv) --- 1743,1752 ---- /* * "utf16idx()" function + * + * Converts a byte or character offset in a string to the corresponding UTF-16 + * code unit offset. */ void f_utf16idx(typval_T *argvars, typval_T *rettv) *************** *** 1780,1785 **** --- 1782,1788 ---- char_u *p; int len; + int utf16idx = 0; for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) { if (*p == NUL) *************** *** 1791,1796 **** --- 1794,1800 ---- rettv->vval.v_number = len; return; } + utf16idx = len; int clen = ptr2len(p); int c = (clen > 1) ? utf_ptr2char(p) : *p; if (c > 0xFFFF) *************** *** 1800,1806 **** idx--; } ! rettv->vval.v_number = len > 0 ? len - 1 : 0; } /* --- 1804,1810 ---- idx--; } ! rettv->vval.v_number = utf16idx; } /* *** ../vim-9.0.1628/src/testdir/test_functions.vim 2023-06-08 17:09:40.192768840 +0100 --- src/testdir/test_functions.vim 2023-06-14 13:05:21.862735424 +0100 *************** *** 1518,1531 **** " UTF-16 index of a string with four byte characters let str = 'a😊😊b' call assert_equal(0, utf16idx(str, 0)) ! call assert_equal(2, utf16idx(str, 1)) ! call assert_equal(2, utf16idx(str, 2)) ! call assert_equal(2, utf16idx(str, 3)) ! call assert_equal(2, utf16idx(str, 4)) ! call assert_equal(4, utf16idx(str, 5)) ! call assert_equal(4, utf16idx(str, 6)) ! call assert_equal(4, utf16idx(str, 7)) ! call assert_equal(4, utf16idx(str, 8)) call assert_equal(5, utf16idx(str, 9)) call assert_equal(6, utf16idx(str, 10)) call assert_equal(-1, utf16idx(str, 11)) --- 1518,1531 ---- " UTF-16 index of a string with four byte characters let str = 'a😊😊b' call assert_equal(0, utf16idx(str, 0)) ! call assert_equal(1, utf16idx(str, 1)) ! call assert_equal(1, utf16idx(str, 2)) ! call assert_equal(1, utf16idx(str, 3)) ! call assert_equal(1, utf16idx(str, 4)) ! call assert_equal(3, utf16idx(str, 5)) ! call assert_equal(3, utf16idx(str, 6)) ! call assert_equal(3, utf16idx(str, 7)) ! call assert_equal(3, utf16idx(str, 8)) call assert_equal(5, utf16idx(str, 9)) call assert_equal(6, utf16idx(str, 10)) call assert_equal(-1, utf16idx(str, 11)) *************** *** 1621,1628 **** " UTF-16 index of a string with four byte characters let str = "a😊😊b" call assert_equal(0, utf16idx(str, 0, v:false, v:true)) ! call assert_equal(2, utf16idx(str, 1, v:false, v:true)) ! call assert_equal(4, utf16idx(str, 2, v:false, v:true)) call assert_equal(5, utf16idx(str, 3, v:false, v:true)) call assert_equal(6, utf16idx(str, 4, v:false, v:true)) call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) --- 1621,1628 ---- " UTF-16 index of a string with four byte characters let str = "a😊😊b" call assert_equal(0, utf16idx(str, 0, v:false, v:true)) ! call assert_equal(1, utf16idx(str, 1, v:false, v:true)) ! call assert_equal(3, utf16idx(str, 2, v:false, v:true)) call assert_equal(5, utf16idx(str, 3, v:false, v:true)) call assert_equal(6, utf16idx(str, 4, v:false, v:true)) call assert_equal(-1, utf16idx(str, 5, v:false, v:true)) *** ../vim-9.0.1628/src/version.c 2023-06-13 22:44:53.538988256 +0100 --- src/version.c 2023-06-14 13:07:03.310895989 +0100 *************** *** 697,698 **** --- 697,700 ---- { /* Add new patch number below this line */ + /**/ + 1629, /**/ -- "Hit any key to continue" does _not_ mean you can hit the on/off button! /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net \\\ /// \\\ \\\ sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ /// \\\ help me help AIDS victims -- http://ICCF-Holland.org ///