From 300de1e98ac236bc887b49c0fbd7398266237b36 Mon Sep 17 00:00:00 2001 From: "Gregory P. Smith" <68491+gpshead@users.noreply.github.com> Date: Sun, 15 Feb 2026 17:43:39 -0800 Subject: [PATCH 1/4] gh-86519: Add prefixmatch APIs to the re module (GH-31137) Adds `prefixmatch` APIs to the re module as an alternate name for our long existing `match` APIs to help alleviate a common Python confusion for those coming from other languages regular expression libraries. These alleviate common confusion around what "match" means as Python is different than other popular languages regex libraries in our use of the term as an API name. The original `match` names are **NOT being deprecated**. Source tooling like linters, IDEs, and LLMs could suggest using `prefixmatch` instead of match to improve code health and reduce cognitive burden of understanding the intent of code when configured for a modern minimum Python version. See the documentation changes for a better description. Discussions took place in the PR, in the issue, and finally at https://discuss.python.org/t/add-re-prefixmatch-deprecate-re-match/105927 Co-Authored-By: Claude Opus 4.5 Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com> Co-Authored-By: Claude Opus 4.6 --- Doc/library/re.rst | 193 +++++++++++------- Doc/whatsnew/3.15.rst | 15 +- Lib/re/__init__.py | 38 ++-- Lib/test/test_inspect/test_inspect.py | 5 +- Lib/test/test_re.py | 22 +- .../2022-02-05-00-15-03.bpo-42353.0ebVGG.rst | 10 + Modules/_sre/clinic/sre.c.h | 38 ++-- Modules/_sre/sre.c | 53 ++++- 8 files changed, 251 insertions(+), 123 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst diff --git a/Doc/library/re.rst b/Doc/library/re.rst index 734301317283fb..df99acf354bf1b 100644 --- a/Doc/library/re.rst +++ b/Doc/library/re.rst @@ -837,7 +837,7 @@ Flags value:: def myfunc(text, flag=re.NOFLAG): - return re.match(text, flag) + return re.search(text, flag) .. versionadded:: 3.11 @@ -893,8 +893,8 @@ Functions Compile a regular expression pattern into a :ref:`regular expression object `, which can be used for matching using its - :func:`~Pattern.match`, :func:`~Pattern.search` and other methods, described - below. + :func:`~Pattern.prefixmatch`, + :func:`~Pattern.search`, and other methods, described below. The expression's behaviour can be modified by specifying a *flags* value. Values can be any of the `flags`_ variables, combined using bitwise OR @@ -903,11 +903,11 @@ Functions The sequence :: prog = re.compile(pattern) - result = prog.match(string) + result = prog.search(string) is equivalent to :: - result = re.match(pattern, string) + result = re.search(pattern, string) but using :func:`re.compile` and saving the resulting regular expression object for reuse is more efficient when the expression will be used several @@ -933,6 +933,7 @@ Functions (the ``|`` operator). +.. function:: prefixmatch(pattern, string, flags=0) .. function:: match(pattern, string, flags=0) If zero or more characters at the beginning of *string* match the regular @@ -940,8 +941,10 @@ Functions ``None`` if the string does not match the pattern; note that this is different from a zero-length match. - Note that even in :const:`MULTILINE` mode, :func:`re.match` will only match - at the beginning of the string and not at the beginning of each line. + .. note:: + + Even in :const:`MULTILINE` mode, this will only match at the + beginning of the string and not at the beginning of each line. If you want to locate a match anywhere in *string*, use :func:`search` instead (see also :ref:`search-vs-match`). @@ -950,6 +953,18 @@ Functions Values can be any of the `flags`_ variables, combined using bitwise OR (the ``|`` operator). + This function now has two names and has long been known as + :func:`~re.match`. Use that name when you need to retain compatibility with + older Python versions. + + .. versionchanged:: next + The alternate :func:`~re.prefixmatch` name of this API was added as a + more explicitly descriptive name than :func:`~re.match`. Use it to better + express intent. The norm in other languages and regular expression + implementations is to use the term *match* to refer to the behavior of + what Python has always called :func:`~re.search`. + See :ref:`prefixmatch-vs-match`. + .. function:: fullmatch(pattern, string, flags=0) @@ -1271,6 +1286,7 @@ Regular Expression Objects >>> pattern.search("dog", 1) # No match; search doesn't include the "d" +.. method:: Pattern.prefixmatch(string[, pos[, endpos]]) .. method:: Pattern.match(string[, pos[, endpos]]) If zero or more characters at the *beginning* of *string* match this regular @@ -1278,17 +1294,32 @@ Regular Expression Objects string does not match the pattern; note that this is different from a zero-length match. + Note that even in :const:`MULTILINE` mode, this will only match at the + beginning of the string and not at the beginning of each line. + The optional *pos* and *endpos* parameters have the same meaning as for the :meth:`~Pattern.search` method. :: >>> pattern = re.compile("o") - >>> pattern.match("dog") # No match as "o" is not at the start of "dog". - >>> pattern.match("dog", 1) # Match as "o" is the 2nd character of "dog". + >>> pattern.prefixmatch("dog") # No match as "o" is not at the start of "dog". + >>> pattern.prefixmatch("dog", 1) # Match as "o" is the 2nd character of "dog". If you want to locate a match anywhere in *string*, use :meth:`~Pattern.search` instead (see also :ref:`search-vs-match`). + This method now has two names and has long been known as + :meth:`~Pattern.match`. Use that name when you need to retain compatibility + with older Python versions. + + .. versionchanged:: next + The alternate :meth:`~Pattern.prefixmatch` name of this API was added as + a more explicitly descriptive name than :meth:`~Pattern.match`. Use it to + better express intent. The norm in other languages and regular expression + implementations is to use the term *match* to refer to the behavior of + what Python has always called :meth:`~Pattern.search`. + See :ref:`prefixmatch-vs-match`. + .. method:: Pattern.fullmatch(string[, pos[, endpos]]) @@ -1376,8 +1407,7 @@ Since :meth:`~Pattern.match` and :meth:`~Pattern.search` return ``None`` when there is no match, you can test whether there was a match with a simple ``if`` statement:: - match = re.search(pattern, string) - if match: + if match := re.search(pattern, string): process(match) .. class:: Match @@ -1415,15 +1445,15 @@ when there is no match, you can test whether there was a match with a simple If a group is contained in a part of the pattern that matched multiple times, the last match is returned. :: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"\A(\w+) (\w+)", "Norwegian Blue, pining for the fjords") >>> m.group(0) # The entire match - 'Isaac Newton' + 'Norwegian Blue' >>> m.group(1) # The first parenthesized subgroup. - 'Isaac' + 'Norwegian' >>> m.group(2) # The second parenthesized subgroup. - 'Newton' + 'Blue' >>> m.group(1, 2) # Multiple arguments give us a tuple. - ('Isaac', 'Newton') + ('Norwegian', 'Blue') If the regular expression uses the ``(?P...)`` syntax, the *groupN* arguments may also be strings identifying groups by their group name. If a @@ -1432,23 +1462,23 @@ when there is no match, you can test whether there was a match with a simple A moderately complicated example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") - >>> m.group('first_name') - 'Malcolm' - >>> m.group('last_name') - 'Reynolds' + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") + >>> m.group('adjective') + 'killer' + >>> m.group('animal') + 'rabbit' Named groups can also be referred to by their index:: >>> m.group(1) - 'Malcolm' + 'killer' >>> m.group(2) - 'Reynolds' + 'rabbit' If a group matches multiple times, only the last match is accessible:: - >>> m = re.match(r"(..)+", "a1b2c3") # Matches 3 times. - >>> m.group(1) # Returns only the last match. + >>> m = re.search(r"(..)+", "a1b2c3") # Matches 3 times. + >>> m.group(1) # Returns only the last match. 'c3' @@ -1457,21 +1487,21 @@ when there is no match, you can test whether there was a match with a simple This is identical to ``m.group(g)``. This allows easier access to an individual group from a match:: - >>> m = re.match(r"(\w+) (\w+)", "Isaac Newton, physicist") + >>> m = re.search(r"(\w+) (\w+)", "Norwegian Blue, pining for the fjords") >>> m[0] # The entire match - 'Isaac Newton' + 'Norwegian Blue' >>> m[1] # The first parenthesized subgroup. - 'Isaac' + 'Norwegian' >>> m[2] # The second parenthesized subgroup. - 'Newton' + 'Blue' Named groups are supported as well:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Isaac Newton") - >>> m['first_name'] - 'Isaac' - >>> m['last_name'] - 'Newton' + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") + >>> m['adjective'] + 'killer' + >>> m['animal'] + 'rabbit' .. versionadded:: 3.6 @@ -1484,7 +1514,7 @@ when there is no match, you can test whether there was a match with a simple For example:: - >>> m = re.match(r"(\d+)\.(\d+)", "24.1632") + >>> m = re.search(r"(\d+)\.(\d+)", "24.1632") >>> m.groups() ('24', '1632') @@ -1492,7 +1522,7 @@ when there is no match, you can test whether there was a match with a simple might participate in the match. These groups will default to ``None`` unless the *default* argument is given:: - >>> m = re.match(r"(\d+)\.?(\d+)?", "24") + >>> m = re.search(r"(\d+)\.?(\d+)?", "24") >>> m.groups() # Second group defaults to None. ('24', None) >>> m.groups('0') # Now, the second group defaults to '0'. @@ -1505,9 +1535,9 @@ when there is no match, you can test whether there was a match with a simple the subgroup name. The *default* argument is used for groups that did not participate in the match; it defaults to ``None``. For example:: - >>> m = re.match(r"(?P\w+) (?P\w+)", "Malcolm Reynolds") + >>> m = re.search(r"(?P\w+) (?P\w+)", "killer rabbit") >>> m.groupdict() - {'first_name': 'Malcolm', 'last_name': 'Reynolds'} + {'adjective': 'killer', 'animal': 'rabbit'} .. method:: Match.start([group]) @@ -1610,42 +1640,41 @@ representing the card with that value. To see if a given string is a valid hand, one could do the following:: - >>> valid = re.compile(r"^[a2-9tjqk]{5}$") - >>> displaymatch(valid.match("akt5q")) # Valid. + >>> valid_hand_re = re.compile(r"^[a2-9tjqk]{5}$") + >>> displaymatch(valid_hand_re.search("akt5q")) # Valid. "" - >>> displaymatch(valid.match("akt5e")) # Invalid. - >>> displaymatch(valid.match("akt")) # Invalid. - >>> displaymatch(valid.match("727ak")) # Valid. + >>> displaymatch(valid_hand_re.search("akt5e")) # Invalid. + >>> displaymatch(valid_hand_re.search("akt")) # Invalid. + >>> displaymatch(valid_hand_re.search("727ak")) # Valid. "" That last hand, ``"727ak"``, contained a pair, or two of the same valued cards. To match this with a regular expression, one could use backreferences as such:: - >>> pair = re.compile(r".*(.).*\1") - >>> displaymatch(pair.match("717ak")) # Pair of 7s. + >>> pair_re = re.compile(r".*(.).*\1") + >>> displaymatch(pair_re.prefixmatch("717ak")) # Pair of 7s. "" - >>> displaymatch(pair.match("718ak")) # No pairs. - >>> displaymatch(pair.match("354aa")) # Pair of aces. + >>> displaymatch(pair_re.prefixmatch("718ak")) # No pairs. + >>> displaymatch(pair_re.prefixmatch("354aa")) # Pair of aces. "" To find out what card the pair consists of, one could use the :meth:`~Match.group` method of the match object in the following manner:: - >>> pair = re.compile(r".*(.).*\1") - >>> pair.match("717ak").group(1) + >>> pair_re = re.compile(r".*(.).*\1") + >>> pair_re.prefixmatch("717ak").group(1) '7' - # Error because re.match() returns None, which doesn't have a group() method: - >>> pair.match("718ak").group(1) + # Error because prefixmatch() returns None, which doesn't have a group() method: + >>> pair_re.prefixmatch("718ak").group(1) Traceback (most recent call last): File "", line 1, in - re.match(r".*(.).*\1", "718ak").group(1) + pair_re.prefixmatch("718ak").group(1) AttributeError: 'NoneType' object has no attribute 'group' - >>> pair.match("354aa").group(1) + >>> pair_re.prefixmatch("354aa").group(1) 'a' - Simulating scanf() ^^^^^^^^^^^^^^^^^^ @@ -1694,23 +1723,22 @@ The equivalent regular expression would be :: .. _search-vs-match: -search() vs. match() -^^^^^^^^^^^^^^^^^^^^ +search() vs. prefixmatch() +^^^^^^^^^^^^^^^^^^^^^^^^^^ .. sectionauthor:: Fred L. Drake, Jr. Python offers different primitive operations based on regular expressions: -+ :func:`re.match` checks for a match only at the beginning of the string ++ :func:`re.prefixmatch` checks for a match only at the beginning of the string + :func:`re.search` checks for a match anywhere in the string (this is what Perl does by default) + :func:`re.fullmatch` checks for entire string to be a match - For example:: - >>> re.match("c", "abcdef") # No match - >>> re.search("c", "abcdef") # Match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("c", "abcdef") # Match >>> re.fullmatch("p.*n", "python") # Match @@ -1719,19 +1747,46 @@ For example:: Regular expressions beginning with ``'^'`` can be used with :func:`search` to restrict the match at the beginning of the string:: - >>> re.match("c", "abcdef") # No match - >>> re.search("^c", "abcdef") # No match - >>> re.search("^a", "abcdef") # Match + >>> re.prefixmatch("c", "abcdef") # No match + >>> re.search("^c", "abcdef") # No match + >>> re.search("^a", "abcdef") # Match -Note however that in :const:`MULTILINE` mode :func:`match` only matches at the +Note however that in :const:`MULTILINE` mode :func:`prefixmatch` only matches at the beginning of the string, whereas using :func:`search` with a regular expression beginning with ``'^'`` will match at the beginning of each line. :: - >>> re.match("X", "A\nB\nX", re.MULTILINE) # No match + >>> re.prefixmatch("X", "A\nB\nX", re.MULTILINE) # No match >>> re.search("^X", "A\nB\nX", re.MULTILINE) # Match +.. _prefixmatch-vs-match: + +prefixmatch() vs. match() +^^^^^^^^^^^^^^^^^^^^^^^^^ + +Why is the :func:`~re.match` function and method discouraged in +favor of the longer :func:`~re.prefixmatch` spelling? + +Many other languages have gained regex support libraries since regular +expressions were added to Python. However in the most popular of those, they +use the term *match* in their APIs to mean the unanchored behavior provided in +Python by :func:`~re.search`. Thus use of the plain term *match* can be +unclear to those used to other languages when reading or writing code and +not familiar with the Python API's divergence from what otherwise become the +industry norm. + +Quoting from the Zen Of Python (``python3 -m this``): *"Explicit is better than +implicit"*. Anyone reading the name :func:`~re.prefixmatch` is likely to +understand the intended semantics. When reading :func:`~re.match` there remains +a seed of doubt about the intended behavior to anyone not already familiar with +this old Python gotcha. + +We **do not** plan to deprecate and remove the older *match* name, +as it has been used in code for over 30 years. +Code supporting older versions of Python should continue to use *match*. + +.. versionadded:: next Making a Phonebook ^^^^^^^^^^^^^^^^^^ @@ -1851,9 +1906,9 @@ every backslash (``'\'``) in a regular expression would have to be prefixed with another one to escape it. For example, the two following lines of code are functionally identical:: - >>> re.match(r"\W(.)\1\W", " ff ") + >>> re.search(r"\W(.)\1\W", " ff ") - >>> re.match("\\W(.)\\1\\W", " ff ") + >>> re.search("\\W(.)\\1\\W", " ff ") When one wants to match a literal backslash, it must be escaped in the regular @@ -1861,9 +1916,9 @@ expression. With raw string notation, this means ``r"\\"``. Without raw string notation, one must use ``"\\\\"``, making the following lines of code functionally identical:: - >>> re.match(r"\\", r"\\") + >>> re.search(r"\\", r"\\") - >>> re.match("\\\\", r"\\") + >>> re.search("\\\\", r"\\") diff --git a/Doc/whatsnew/3.15.rst b/Doc/whatsnew/3.15.rst index cd80947924684d..5dca1545b144ef 100644 --- a/Doc/whatsnew/3.15.rst +++ b/Doc/whatsnew/3.15.rst @@ -824,6 +824,19 @@ pickle (Contributed by Zackery Spytz and Serhiy Storchaka in :gh:`77188`.) +re +-- + +* :func:`re.prefixmatch` and a corresponding :meth:`~re.Pattern.prefixmatch` + have been added as alternate more explicit names for the existing + :func:`re.match` and :meth:`~re.Pattern.match` APIs. These are intended + to be used to alleviate confusion around what *match* means by following the + Zen of Python's *"Explicit is better than implicit"* mantra. Most other + language regular expression libraries use an API named *match* to mean what + Python has always called *search*. + (Contributed by Gregory P. Smith in :gh:`86519`.) + + resource -------- @@ -1285,7 +1298,7 @@ Diego Russo in :gh:`140683` and :gh:`142305`.) Removed -======= +======== ctypes ------ diff --git a/Lib/re/__init__.py b/Lib/re/__init__.py index ecec16e9005f3b..e6d29fd7a403b6 100644 --- a/Lib/re/__init__.py +++ b/Lib/re/__init__.py @@ -85,17 +85,18 @@ \\ Matches a literal backslash. This module exports the following functions: - match Match a regular expression pattern to the beginning of a string. - fullmatch Match a regular expression pattern to all of a string. - search Search a string for the presence of a pattern. - sub Substitute occurrences of a pattern found in a string. - subn Same as sub, but also return the number of substitutions made. - split Split a string by the occurrences of a pattern. - findall Find all occurrences of a pattern in a string. - finditer Return an iterator yielding a Match object for each match. - compile Compile a pattern into a Pattern object. - purge Clear the regular expression cache. - escape Backslash all non-alphanumerics in a string. + prefixmatch Match a regular expression pattern to the beginning of a str. + match The original name of prefixmatch prior to 3.15. + fullmatch Match a regular expression pattern to all of a string. + search Search a string for the presence of a pattern. + sub Substitute occurrences of a pattern found in a string. + subn Same as sub, but also return the number of substitutions made. + split Split a string by the occurrences of a pattern. + findall Find all occurrences of a pattern in a string. + finditer Return an iterator yielding a Match object for each match. + compile Compile a pattern into a Pattern object. + purge Clear the regular expression cache. + escape Backslash all non-alphanumerics in a string. Each function other than purge and escape can take an optional 'flags' argument consisting of one or more of the following module constants, joined by "|". @@ -130,7 +131,7 @@ # public symbols __all__ = [ - "match", "fullmatch", "search", "sub", "subn", "split", + "prefixmatch", "match", "fullmatch", "search", "sub", "subn", "split", "findall", "finditer", "compile", "purge", "escape", "error", "Pattern", "Match", "A", "I", "L", "M", "S", "X", "U", "ASCII", "IGNORECASE", "LOCALE", "MULTILINE", "DOTALL", "VERBOSE", @@ -159,10 +160,13 @@ class RegexFlag: # -------------------------------------------------------------------- # public interface -def match(pattern, string, flags=0): +def prefixmatch(pattern, string, flags=0): """Try to apply the pattern at the start of the string, returning a Match object, or None if no match was found.""" - return _compile(pattern, flags).match(string) + return _compile(pattern, flags).prefixmatch(string) + +# Our original name which was less explicitly clear about the behavior for prefixmatch. +match = prefixmatch def fullmatch(pattern, string, flags=0): """Try to apply the pattern to all of the string, returning @@ -311,7 +315,7 @@ def escape(pattern): return pattern.translate(_special_chars_map).encode('latin1') Pattern = type(_compiler.compile('', 0)) -Match = type(_compiler.compile('', 0).match('')) +Match = type(_compiler.compile('', 0).prefixmatch('')) # -------------------------------------------------------------------- # internals @@ -410,10 +414,10 @@ def __init__(self, lexicon, flags=0): def scan(self, string): result = [] append = result.append - match = self.scanner.scanner(string).match + _match = self.scanner.scanner(string).prefixmatch i = 0 while True: - m = match() + m = _match() if not m: break j = m.end() diff --git a/Lib/test/test_inspect/test_inspect.py b/Lib/test/test_inspect/test_inspect.py index e4a3a7d9add2c2..32bb47de04b113 100644 --- a/Lib/test/test_inspect/test_inspect.py +++ b/Lib/test/test_inspect/test_inspect.py @@ -6277,7 +6277,10 @@ def test_pwd_module_has_signatures(self): def test_re_module_has_signatures(self): import re - methods_no_signature = {'Match': {'group'}} + methods_no_signature = { + 'Match': {'group'}, + 'Pattern': {'match'}, # It is now an alias for prefixmatch + } self._test_module_has_signatures(re, methods_no_signature=methods_no_signature, good_exceptions={'error', 'PatternError'}) diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py index 9f6f04bf6b8347..5e19307b815a1b 100644 --- a/Lib/test/test_re.py +++ b/Lib/test/test_re.py @@ -90,10 +90,13 @@ def test_search_star_plus(self): self.assertEqual(re.search('x+', 'axx').span(), (1, 3)) self.assertIsNone(re.search('x', 'aaa')) self.assertEqual(re.match('a*', 'xxx').span(0), (0, 0)) + self.assertEqual(re.prefixmatch('a*', 'xxx').span(0), (0, 0)) self.assertEqual(re.match('a*', 'xxx').span(), (0, 0)) self.assertEqual(re.match('x*', 'xxxa').span(0), (0, 3)) + self.assertEqual(re.prefixmatch('x*', 'xxxa').span(0), (0, 3)) self.assertEqual(re.match('x*', 'xxxa').span(), (0, 3)) self.assertIsNone(re.match('a+', 'xxx')) + self.assertIsNone(re.prefixmatch('a+', 'xxx')) def test_branching(self): """Test Branching @@ -180,6 +183,7 @@ def test_bug_449000(self): def test_bug_1661(self): # Verify that flags do not get silently ignored with compiled patterns pattern = re.compile('.') + self.assertRaises(ValueError, re.prefixmatch, pattern, 'A', re.I) self.assertRaises(ValueError, re.match, pattern, 'A', re.I) self.assertRaises(ValueError, re.search, pattern, 'A', re.I) self.assertRaises(ValueError, re.findall, pattern, 'A', re.I) @@ -517,6 +521,8 @@ def test_re_match(self): self.assertEqual(re.match(b'(a)', string).group(0), b'a') self.assertEqual(re.match(b'(a)', string).group(1), b'a') self.assertEqual(re.match(b'(a)', string).group(1, 1), (b'a', b'a')) + self.assertEqual(re.prefixmatch(b'(a)', string).group(1, 1), + (b'a', b'a')) for a in ("\xe0", "\u0430", "\U0001d49c"): self.assertEqual(re.match(a, a).groups(), ()) self.assertEqual(re.match('(%s)' % a, a).groups(), (a,)) @@ -558,10 +564,8 @@ def __index__(self): self.assertEqual(m.group(2, 1), ('b', 'a')) self.assertEqual(m.group(Index(2), Index(1)), ('b', 'a')) - def test_match_getitem(self): - pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') - - m = pat.match('a') + def do_test_match_getitem(self, match_fn): + m = match_fn('a') self.assertEqual(m['a1'], 'a') self.assertEqual(m['b2'], None) self.assertEqual(m['c3'], None) @@ -585,7 +589,7 @@ def test_match_getitem(self): with self.assertRaisesRegex(IndexError, 'no such group'): 'a1={a2}'.format_map(m) - m = pat.match('ac') + m = match_fn('ac') self.assertEqual(m['a1'], 'a') self.assertEqual(m['b2'], None) self.assertEqual(m['c3'], 'c') @@ -602,6 +606,14 @@ def test_match_getitem(self): # No len(). self.assertRaises(TypeError, len, m) + def test_match_getitem(self): + pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') + self.do_test_match_getitem(pat.match) + + def test_prefixmatch_getitem(self): + pat = re.compile('(?:(?Pa)|(?Pb))(?Pc)?') + self.do_test_match_getitem(pat.prefixmatch) + def test_re_fullmatch(self): # Issue 16203: Proposal: add re.fullmatch() method. self.assertEqual(re.fullmatch(r"a", "a").span(), (0, 1)) diff --git a/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst new file mode 100644 index 00000000000000..a3e0a3e14af1fa --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-02-05-00-15-03.bpo-42353.0ebVGG.rst @@ -0,0 +1,10 @@ +The :mod:`re` module gains a new :func:`re.prefixmatch` function as an +explicit spelling of what has to date always been known as :func:`re.match`. +:class:`re.Pattern` similary gains a :meth:`re.Pattern.prefixmatch` method. + +Why? Explicit is better than implicit. Other widely used languages all use +the term "match" to mean what Python uses the term "search" for. The +unadorened "match" name in Python has been a frequent case of confusion and +coding bugs due to the inconsistency with the rest if the software industry. + +We do not plan to deprecate and remove the older ``match`` name. diff --git a/Modules/_sre/clinic/sre.c.h b/Modules/_sre/clinic/sre.c.h index d2f25a71495cda..b49bf4e058b69b 100644 --- a/Modules/_sre/clinic/sre.c.h +++ b/Modules/_sre/clinic/sre.c.h @@ -164,22 +164,22 @@ _sre_unicode_tolower(PyObject *module, PyObject *arg) return return_value; } -PyDoc_STRVAR(_sre_SRE_Pattern_match__doc__, -"match($self, /, string, pos=0, endpos=sys.maxsize)\n" +PyDoc_STRVAR(_sre_SRE_Pattern_prefixmatch__doc__, +"prefixmatch($self, /, string, pos=0, endpos=sys.maxsize)\n" "--\n" "\n" "Matches zero or more characters at the beginning of the string."); -#define _SRE_SRE_PATTERN_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Pattern_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_match__doc__}, +#define _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF \ + {"prefixmatch", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Pattern_prefixmatch__doc__}, static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos); +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos); static PyObject * -_sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_sre_SRE_Pattern_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -205,7 +205,7 @@ _sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, static const char * const _keywords[] = {"string", "pos", "endpos", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "match", + .fname = "prefixmatch", .kwtuple = KWTUPLE, }; #undef KWTUPLE @@ -254,7 +254,7 @@ _sre_SRE_Pattern_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, endpos = ival; } skip_optional_pos: - return_value = _sre_SRE_Pattern_match_impl((PatternObject *)self, cls, string, pos, endpos); + return_value = _sre_SRE_Pattern_prefixmatch_impl((PatternObject *)self, cls, string, pos, endpos); exit: return return_value; @@ -1523,25 +1523,25 @@ _sre_SRE_Match___deepcopy__(PyObject *self, PyObject *memo) return return_value; } -PyDoc_STRVAR(_sre_SRE_Scanner_match__doc__, -"match($self, /)\n" +PyDoc_STRVAR(_sre_SRE_Scanner_prefixmatch__doc__, +"prefixmatch($self, /)\n" "--\n" "\n"); -#define _SRE_SRE_SCANNER_MATCH_METHODDEF \ - {"match", _PyCFunction_CAST(_sre_SRE_Scanner_match), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_match__doc__}, +#define _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF \ + {"prefixmatch", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch), METH_METHOD|METH_FASTCALL|METH_KEYWORDS, _sre_SRE_Scanner_prefixmatch__doc__}, static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls); +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls); static PyObject * -_sre_SRE_Scanner_match(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +_sre_SRE_Scanner_prefixmatch(PyObject *self, PyTypeObject *cls, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { if (nargs || (kwnames && PyTuple_GET_SIZE(kwnames))) { - PyErr_SetString(PyExc_TypeError, "match() takes no arguments"); + PyErr_SetString(PyExc_TypeError, "prefixmatch() takes no arguments"); return NULL; } - return _sre_SRE_Scanner_match_impl((ScannerObject *)self, cls); + return _sre_SRE_Scanner_prefixmatch_impl((ScannerObject *)self, cls); } PyDoc_STRVAR(_sre_SRE_Scanner_search__doc__, @@ -1568,4 +1568,4 @@ _sre_SRE_Scanner_search(PyObject *self, PyTypeObject *cls, PyObject *const *args #ifndef _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #define _SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF #endif /* !defined(_SRE_SRE_PATTERN__FAIL_AFTER_METHODDEF) */ -/*[clinic end generated code: output=bbf42e1de3bdd3ae input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0c867efb64e020aa input=a9049054013a1b77]*/ diff --git a/Modules/_sre/sre.c b/Modules/_sre/sre.c index 59ff9078e6cff4..d6cdd861fd85a2 100644 --- a/Modules/_sre/sre.c +++ b/Modules/_sre/sre.c @@ -766,7 +766,7 @@ sre_search(SRE_STATE* state, SRE_CODE* pattern) } /*[clinic input] -_sre.SRE_Pattern.match +_sre.SRE_Pattern.prefixmatch cls: defining_class / @@ -778,10 +778,10 @@ Matches zero or more characters at the beginning of the string. [clinic start generated code]*/ static PyObject * -_sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, - PyObject *string, Py_ssize_t pos, - Py_ssize_t endpos) -/*[clinic end generated code: output=ec6208ea58a0cca0 input=4bdb9c3e564d13ac]*/ +_sre_SRE_Pattern_prefixmatch_impl(PatternObject *self, PyTypeObject *cls, + PyObject *string, Py_ssize_t pos, + Py_ssize_t endpos) +/*[clinic end generated code: output=a0e079fb4f875240 input=e2a7e68ea47d048c]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE state; @@ -809,6 +809,7 @@ _sre_SRE_Pattern_match_impl(PatternObject *self, PyTypeObject *cls, return match; } + /*[clinic input] _sre.SRE_Pattern.fullmatch @@ -2671,7 +2672,7 @@ _sre_SRE_Match___deepcopy___impl(MatchObject *self, PyObject *memo) } PyDoc_STRVAR(match_doc, -"The result of re.match() and re.search().\n\ +"The result of re.search(), re.prefixmatch(), and re.fullmatch().\n\ Match objects always have a boolean value of True."); PyDoc_STRVAR(match_group_doc, @@ -2863,7 +2864,7 @@ scanner_end(ScannerObject* self) } /*[clinic input] -_sre.SRE_Scanner.match +_sre.SRE_Scanner.prefixmatch cls: defining_class / @@ -2871,8 +2872,8 @@ _sre.SRE_Scanner.match [clinic start generated code]*/ static PyObject * -_sre_SRE_Scanner_match_impl(ScannerObject *self, PyTypeObject *cls) -/*[clinic end generated code: output=6e22c149dc0f0325 input=b5146e1f30278cb7]*/ +_sre_SRE_Scanner_prefixmatch_impl(ScannerObject *self, PyTypeObject *cls) +/*[clinic end generated code: output=02b3b9d2954a2157 input=3049b20466c56a8e]*/ { _sremodulestate *module_state = get_sre_module_state_by_class(cls); SRE_STATE* state = &self->state; @@ -3170,7 +3171,12 @@ pattern_richcompare(PyObject *lefto, PyObject *righto, int op) #include "clinic/sre.c.h" static PyMethodDef pattern_methods[] = { - _SRE_SRE_PATTERN_MATCH_METHODDEF + _SRE_SRE_PATTERN_PREFIXMATCH_METHODDEF + /* "match" reuses the prefixmatch Clinic-generated parser and impl + * to avoid duplicating the argument parsing boilerplate code. */ + {"match", _PyCFunction_CAST(_sre_SRE_Pattern_prefixmatch), + METH_METHOD|METH_FASTCALL|METH_KEYWORDS, + _sre_SRE_Pattern_prefixmatch__doc__}, _SRE_SRE_PATTERN_FULLMATCH_METHODDEF _SRE_SRE_PATTERN_SEARCH_METHODDEF _SRE_SRE_PATTERN_SUB_METHODDEF @@ -3297,7 +3303,12 @@ static PyType_Spec match_spec = { }; static PyMethodDef scanner_methods[] = { - _SRE_SRE_SCANNER_MATCH_METHODDEF + _SRE_SRE_SCANNER_PREFIXMATCH_METHODDEF + /* "match" reuses the prefixmatch Clinic-generated parser and impl + * to avoid duplicating the argument parsing boilerplate code. */ + {"match", _PyCFunction_CAST(_sre_SRE_Scanner_prefixmatch), + METH_METHOD|METH_FASTCALL|METH_KEYWORDS, + _sre_SRE_Scanner_prefixmatch__doc__}, _SRE_SRE_SCANNER_SEARCH_METHODDEF {NULL, NULL} }; @@ -3401,11 +3412,31 @@ do { \ } \ } while (0) + +#ifdef Py_DEBUG +static void +_assert_match_aliases_prefixmatch(PyMethodDef *methods) +{ + PyMethodDef *prefixmatch_md = &methods[0]; + PyMethodDef *match_md = &methods[1]; + assert(strcmp(prefixmatch_md->ml_name, "prefixmatch") == 0); + assert(strcmp(match_md->ml_name, "match") == 0); + assert(match_md->ml_meth == prefixmatch_md->ml_meth); + assert(match_md->ml_flags == prefixmatch_md->ml_flags); + assert(match_md->ml_doc == prefixmatch_md->ml_doc); +} +#endif + static int sre_exec(PyObject *m) { _sremodulestate *state; +#ifdef Py_DEBUG + _assert_match_aliases_prefixmatch(pattern_methods); + _assert_match_aliases_prefixmatch(scanner_methods); +#endif + /* Create heap types */ state = get_sre_module_state(m); CREATE_TYPE(m, state->Pattern_Type, &pattern_spec); From c91638ca0671b8038831f963ed44e66cdda006a2 Mon Sep 17 00:00:00 2001 From: Ramin Farajpour Cami Date: Mon, 16 Feb 2026 06:13:07 +0330 Subject: [PATCH 2/4] gh-144833: Fix use-after-free in SSL module when SSL_new() fails (GH-144843) In newPySSLSocket(), when SSL_new() returns NULL, Py_DECREF(self) was called before _setSSLError(get_state_ctx(self), ...), causing a use-after-free. Additionally, get_state_ctx() was called with self (PySSLSocket*) instead of sslctx (PySSLContext*), which is a type confusion bug. Fix by calling _setSSLError() before Py_DECREF() and using sslctx instead of self for get_state_ctx(). --- .../Library/2026-02-15-00-00-00.gh-issue-144833.TUelo1.rst | 3 +++ Modules/_ssl.c | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 Misc/NEWS.d/next/Library/2026-02-15-00-00-00.gh-issue-144833.TUelo1.rst diff --git a/Misc/NEWS.d/next/Library/2026-02-15-00-00-00.gh-issue-144833.TUelo1.rst b/Misc/NEWS.d/next/Library/2026-02-15-00-00-00.gh-issue-144833.TUelo1.rst new file mode 100644 index 00000000000000..6d5b18f59ee7ea --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-02-15-00-00-00.gh-issue-144833.TUelo1.rst @@ -0,0 +1,3 @@ +Fixed a use-after-free in :mod:`ssl` when ``SSL_new()`` returns NULL in +``newPySSLSocket()``. The error was reported via a dangling pointer after the +object had already been freed. diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 66d699b4339ce3..b0c0d8deeecd23 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -917,8 +917,8 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, self->ssl = SSL_new(ctx); PySSL_END_ALLOW_THREADS(sslctx) if (self->ssl == NULL) { + _setSSLError(get_state_ctx(sslctx), NULL, 0, __FILE__, __LINE__); Py_DECREF(self); - _setSSLError(get_state_ctx(self), NULL, 0, __FILE__, __LINE__); return NULL; } From 87c7f193b8ea7be36f3ba5a66b5c223efde4c674 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Sun, 15 Feb 2026 23:02:07 -0600 Subject: [PATCH 3/4] gh-144551: Update Android builds to use OpenSSL 3.0.19 (GH-144864) --- Android/android.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Android/android.py b/Android/android.py index 629696be3db300..0a894a958a0165 100755 --- a/Android/android.py +++ b/Android/android.py @@ -208,7 +208,7 @@ def make_build_python(context): def unpack_deps(host, prefix_dir): os.chdir(prefix_dir) deps_url = "https://github.com/beeware/cpython-android-source-deps/releases/download" - for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.18-0", + for name_ver in ["bzip2-1.0.8-3", "libffi-3.4.4-3", "openssl-3.0.19-1", "sqlite-3.50.4-0", "xz-5.4.6-1", "zstd-1.5.7-1"]: filename = f"{name_ver}-{host}.tar.gz" download(f"{deps_url}/{name_ver}/{filename}") From ebe02e4f393bc0bd2263c43da313b28012f82af9 Mon Sep 17 00:00:00 2001 From: Zachary Ware Date: Sun, 15 Feb 2026 23:02:23 -0600 Subject: [PATCH 4/4] gh-144551: Update iOS builds to use OpenSSL 3.0.19 (GH-144865) --- Apple/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Apple/__main__.py b/Apple/__main__.py index 256966e76c2c97..253bdfaab5520c 100644 --- a/Apple/__main__.py +++ b/Apple/__main__.py @@ -316,7 +316,7 @@ def unpack_deps( for name_ver in [ "BZip2-1.0.8-2", "libFFI-3.4.7-2", - "OpenSSL-3.0.18-1", + "OpenSSL-3.0.19-1", "XZ-5.6.4-2", "mpdecimal-4.0.0-2", "zstd-1.5.7-1",