Login
[x]
Log in using an account from:
Fedora Account System
Red Hat Associate
Red Hat Customer
Or login using a Red Hat Bugzilla account
Forgot Password
Login:
Hide Forgot
Create an Account
Red Hat Bugzilla – Attachment 952258 Details for
Bug 799863
inconsistent \w and [[:alnum:]] behaviour
[?]
New
Simple Search
Advanced Search
My Links
Browse
Requests
Reports
Current State
Search
Tabular reports
Graphical reports
Duplicates
Other Reports
User Changes
Plotly Reports
Bug Status
Bug Severity
Non-Defaults
|
Product Dashboard
Help
Page Help!
Bug Writing Guidelines
What's new
Browser Support Policy
5.0.4.rh83 Release notes
FAQ
Guides index
User guide
Web Services
Contact
Legal
This site requires JavaScript to be enabled to function correctly, please enable it.
[patch]
Backported fix
grep-2.6.3-w-multibyte-fix.patch (text/plain), 3.30 KB, created by
Jaroslav Škarvada
on 2014-10-30 16:57:42 UTC
(
hide
)
Description:
Backported fix
Filename:
MIME Type:
Creator:
Jaroslav Škarvada
Created:
2014-10-30 16:57:42 UTC
Size:
3.30 KB
patch
obsolete
>diff --git a/src/dfa.c b/src/dfa.c >--- a/src/dfa.c >+++ b/src/dfa.c >@@ -770,6 +770,20 @@ parse_bracket_exp (void) > /* Return non-zero if C is a `word-constituent' byte; zero otherwise. */ > #define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') > >+#define PUSH_LEX_STATE(s) \ >+ do \ >+ { \ >+ char const *lexptr_saved = lexptr; \ >+ size_t lexleft_saved = lexleft; \ >+ lexptr = (s); \ >+ lexleft = strlen (lexptr) >+ >+#define POP_LEX_STATE() \ >+ lexptr = lexptr_saved; \ >+ lexleft = lexleft_saved; \ >+ } \ >+ while (0) >+ > static token > lex (void) > { >@@ -1057,14 +1071,33 @@ lex (void) > case 'W': > if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) > goto normal_char; >- zeroset(ccl); >- for (c2 = 0; c2 < NOTCHAR; ++c2) >- if (IS_WORD_CONSTITUENT(c2)) >- setbit(c2, ccl); >- if (c == 'W') >- notset(ccl); >+ >+ if (MB_CUR_MAX <= 1) >+ { >+ zeroset (ccl); >+ for (c2 = 0; c2 < NOTCHAR; ++c2) >+ if (IS_WORD_CONSTITUENT (c2)) >+ setbit (c2, ccl); >+ if (c == 'W') >+ notset (ccl); >+ laststart = false; >+ return lasttok = CSET + charclass_index (ccl); >+ } >+ >+ /* FIXME: see if optimizing this, as is done with ANYCHAR and >+ add_utf8_anychar, makes sense. */ >+ >+ /* \w and \W are documented to be equivalent to [_[:alnum:]] and >+ [^_[:alnum:]] respectively, so tell the lexer to process those >+ strings, each minus its "already processed" '['. */ >+ PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]"); >+ >+ lasttok = parse_bracket_exp (); >+ >+ POP_LEX_STATE (); >+ > laststart = 0; >- return lasttok = CSET + charclass_index(ccl); >+ return lasttok; > > case '[': > if (backslash) >diff --git a/tests/Makefile.am b/tests/Makefile.am >--- a/tests/Makefile.am >+++ b/tests/Makefile.am >@@ -61,6 +61,7 @@ TESTS = \ > turkish-I \ > turkish-I-without-dot \ > word-multi-file \ >+ word-multibyte \ > yesno.sh > > EXTRA_DIST = \ >diff --git a/tests/Makefile.in b/tests/Makefile.in >--- a/tests/Makefile.in >+++ b/tests/Makefile.in >@@ -827,6 +827,7 @@ TESTS = \ > turkish-I \ > turkish-I-without-dot \ > word-multi-file \ >+ word-multibyte \ > yesno.sh > > EXTRA_DIST = \ >@@ -1247,6 +1248,8 @@ turkish-I-without-dot.log: turkish-I-without-dot > @p='turkish-I-without-dot'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) > word-multi-file.log: word-multi-file > @p='word-multi-file'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) >+word-multibyte.log: word-multibyte >+ @p='word-multibyte'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) > yesno.sh.log: yesno.sh > @p='yesno.sh'; $(am__check_pre) $(LOG_COMPILE) "$$tst" $(am__check_post) > .test.log: >diff --git a/tests/word-multibyte b/tests/word-multibyte >new file mode 100644 >--- /dev/null >+++ b/tests/word-multibyte >@@ -0,0 +1,23 @@ >+#!/bin/sh >+# This would fail for grep-2.20 >+. "${srcdir=.}/init.sh"; path_prepend_ ../src >+ >+require_en_utf8_locale_ >+ >+printf '\xc3\xa1\n' > in || framework_failure_ >+LC_ALL=en_US.UTF-8 >+export LC_ALL >+ >+fail=0 >+ >+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do >+ out=out1-$LOC >+ LC_ALL=$LOC grep '\w' in >$out || fail=1 >+ compare in $out || fail=1 >+ >+ out=out2-$LOC >+ LC_ALL=$LOC grep '\W' in >$out && fail=1 >+ compare /dev/null $out || fail=1 >+done >+ >+Exit $fail >-- >1.9.3 >
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 799863
: 952258 |
985631