Programming
Bash
Arrays
Regex - grep
Strings
C
Arrays
Regex - POSIX
Strings
C++
Arrays
Bimaps
Functions
Maps
Regex
Strings
Go
Arrays
Maps
Strings
Java
Arrays
Regex
Strings
JavaScript
Arrays
Functions
Objects
Regex
Strings
Perl
Arrays
Regex
Strings
Python
Arrays
Functions
Regex
Basics
Anchor start
Anchor end
Repeats
Alternatives
Character classes
Pos lookahead
Neg lookahead
Pos lookbehind
Neg lookbehind
Capture groups
Find all
Non-greedy
Strings
Python Regex
alternatives colour
Basics
Any char (alpha) p = "."
s = "a"
t = re.match(p,s)
true
Any char (digit) p = "."
s = "2"
t = re.match(p,s)
true
Any char (punct) p = "."
s = ";"
t = re.match(p,s)
true
Any char (\n) p = "."
s = "\n"
# . matches anything except \n
t = re.match(p,s)
false
Specific char (alpha) p = "a"
s = "a"
t = re.match(p,s)
true
Specific char (alpha) p = "a"
s = "b"
t = re.match(p,s)
false
Specific char (oct) p = "\141"
s = "a"
t = re.match(p,s)
true
Specific char (oct) p = "\141"
s = "b"
t = re.match(p,s)
false
Specific char (hex) p = "\x61"
s = "a"
t = re.match(p,s)
true
Specific char (hex) p = "\x61"
s = "b"
t = re.match(p,s)
false
Anchor start
Start of string # "re.match" tied to start of
# string but not end so no
# need for anchors or .*
p = "hello"
s = "hello world"
t = re.match(p,s)
true
Start of string [alternative] # "re.search" searches thru
# string so need anchor ^
p = "^hello"
s = "hello world"
t = re.search(p,s)
true
Start of string p = "world"
s = "hello world"
t = re.match(p,s)
false
Start of string [alternative] p = "^world"
s = "hello world"
t = re.search(p,s)
false
Start of line p = "^world"
s = "hello\nworld"
t = re.search(p,s,re.MULTILINE)
true
Start of line p = "^world"
s = "hello world"
t = re.search(p,s,re.MULTILINE)
false
Start of word p = "\\bworld"
s = "hello world"
t = re.search(p,s)
true
Start of word p = "\\bworld"
s = "helloworld"
t = re.match(p,s)
false
Anchor end
End of string p = ".*world$"
s = "hello world"
# re.match tied to start of str
t = re.match(p,s)
true
End of string [alternative] p = "world$"
s = "hello world"
# re.search searches thru str
t = re.search(p,s)
true
End of string p = ".*hello$"
s = "hello world"
t = re.match(p,s)
false
End of string [alternative] p = "hello$"
s = "hello world"
t = re.search(p,s)
false
End of line p = "hello$"
s = "hello\nworld"
t = re.search(p,s,re.MULTILINE)
true
End of line p = "hello$"
s = "hello world"
t = re.search(p,s,re.MULTILINE)
false
End of word p = "hello\\b"
s = "hello world"
t = re.match(p,s)
true
End of word p = "hello\\b"
s = "helloworld"
t = re.match(p,s)
false
Repeats
Zero or more (0) p = "a*b"
s = "b"
t = re.match(p,s)
true
Zero or more (1) p = "a*b"
s = "ab"
t = re.match(p,s)
true
Zero or more (>1) p = "a*b"
s = "aaaaaab"
t = re.match(p,s)
true
One or more (0) p = "a+b"
s = "b"
t = re.match(p,s)
false
One or more (1) p = "a+b"
s = "ab"
t = re.match(p,s)
true
One or more (>1) p = "a+b"
s = "aaaaaab"
t = re.match(p,s)
true
Zero or one (0) p = "ca?b"
s = "cb"
t = re.match(p,s)
true
Zero or one (1) p = "ca?b"
s = "cab"
t = re.match(p,s)
true
Zero or one (>1) p = "ca?b"
s = "caab"
t = re.match(p,s)
false
N (N) p = "a{2}$"
s = "aa"
t = re.match(p,s)
true
N (>N) p = "a{2}$"
s = "aaa"
t = re.match(p,s)
false
N (<N) p = "a{2}$"
s = "a"
t = re.match(p,s)
false
N or more (N) p = "^a{2,}$"
s = "aa"
t = re.match(p,s)
true
N or more (>N) p = "^a{2,}$"
s = "aaaaaaa"
t = re.match(p,s)
true
N or more (<N) p = "^a{2,}$"
s = "a"
t = re.match(p,s)
false
N or less (N) p = "^a{,2}$"
s = "aa"
t = re.match(p,s)
true
N or less (<N) p = "^a{,2}$"
s = "a"
t = re.match(p,s)
true
N or less (>N) p = "^a{,2}$"
s = "aaa"
t = re.match(p,s)
false
Between N and M (N) p = "^a{2,3}$"
s = "aa"
t = re.match(p,s)
true
Between N and M (M) p = "^a{2,3}$"
s = "aaa"
t = re.match(p,s)
true
Between N and M (<N) p = "^a{2,3}$"
s = "a"
t = re.match(p,s)
false
Between N and M (>M) p = "^a{2,3}$"
s = "aaaa"
t = re.match(p,s)
false
Alternatives
String A or B (A) p = "abc|def"
s = "abc"
t = re.match(p,s)
true
String A or B (B) p = "abc|def"
s = "def"
t = re.match(p,s)
true
String A or B (neither) p = "abc|def"
s = "hello"
t = re.match(p,s)
false
Grouped A or B (A) p = "abc(d|ef)"
s = "abcd"
t = re.match(p,s)
true
Grouped A or B (B) p = "abc(d|ef)"
s = "abcef"
t = re.match(p,s)
true
Grouped A or B (neither) p = "abc(d|ef)"
s = "abcf"
t = re.match(p,s)
false
Character classes
Literal (included) p = "[abc]"
s = "a"
t = re.match(p,s)
true
Literal (included) p = "[abc]"
s = "b"
t = re.match(p,s)
true
Literal (excluded) p = "[abc]"
s = "d"
t = re.match(p,s)
false
Range (inside) p = "[a-f]"
s = "a"
t = re.match(p,s)
true
Range (inside) p = "[a-f]"
s = "c"
t = re.match(p,s)
true
Range (outside) p = "[a-f]"
s = "g"
t = re.match(p,s)
false
Negated literal (excluded) p = "[^abc]"
s = "d"
t = re.match(p,s)
true
Negated literal (included) p = "[^abc]"
s = "b"
t = re.match(p,s)
false
Negated range (outside) p = "[^c-g]"
s = "b"
t = re.match(p,s)
true
Negated range (outside) p = "[^c-g]"
s = "h"
t = re.match(p,s)
true
Negated range (inside) p = "[^c-g]"
s = "c"
t = re.match(p,s)
false
Negated range (inside) p = "[^c-g]"
s = "g"
t = re.match(p,s)
false
Positive lookahead
A directly followed by B # A=foo, B=bar
p = "foo(?=bar)"
s = "foobar"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A directly followed by B # A=foo, B=bar
p = "foo(?=bar)"
s = "barfoo"
t = re.search(p,s)
false
A directly followed by B # A=foo, B=bar
p = "foo(?=bar)"
s = "foo123bar"
t = re.search(p,s)
false
A followed by B # A=foo, B=bar
p = "foo(?=.*bar)"
s = "foobar"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A followed by B # A=foo, B=bar
p = "foo(?=.*bar)"
s = "foo123bar"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A followed by B # A=foo, B=bar
p = "foo(?=.*bar)"
s = "barfoo"
t = re.search(p,s)
false
Negative lookahead
A not directly followed by B # A=foo, B=bar
p = "foo(?!bar)"
s = "foo123"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A not directly followed by B # A=foo, B=bar
p = "foo(?!bar)"
s = "foo123bar"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A not directly followed by B # A=foo, B=bar
p = "foo(?!bar)"
s = "foobar"
t = re.search(p,s)
false
A not followed by B # A=foo, B=bar
p = "foo(?!.*bar)"
s = "foo123"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A not followed by B # A=foo, B=bar
p = "foo(?!.*bar)"
s = "foobar"
t = re.search(p,s)
false
A not followed by B # A=foo, B=bar
p = "foo(?!.*bar)"
s = "foo123bar"
t = re.search(p,s)
false
Positive lookbehind
A directly preceded by B # A=foo, B=bar
p = "(?<=bar)foo"
s = "barfoo"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A directly preceded by B # A=foo, B=bar
p = "(?<=bar)foo"
s = "foobar"
t = re.search(p,s)
false
A directly preceded by B # A=foo, B=bar
p = "(?<=bar)foo"
s = "bar123foo"
t = re.search(p,s)
false
Negative lookbehind
A not directly preceded by B # A=foo, B=bar
p = "(?<!bar)foo"
s = "123foo"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A not directly preceded by B # A=foo, B=bar
p = "(?<!bar)foo"
s = "bar123foo"
t = re.search(p,s)
r="false"
if t:
  r=t.group(0)
foo
A not directly preceded by B # A=foo, B=bar
p = "(?<!bar)foo"
s = "barfoo"
t = re.search(p,s)
false
Capture groups
Group 1 p = "([0-9]+)([a-z]+)"
s = "123abc"
m = re.search(p,s)
g = m.group(1);
123
Group 2 p = "([0-9]+)([a-z]+)"
s = "123abc"
m = re.search(p,s)
g = m.group(2);
abc
Group 1 following non-capture group p = "(?:[0-9]+)([a-z]+)"
s = "123abc"
m = re.search(p,s)
g = m.group(1);
abc
Optional group 1 - matched p = "([0-9]+)*([a-z]+)"
s = "123abc"
m = re.search(p,s)
g = m.group(1);
123
Optional group 1 - not matched p = "([0-9]+)*([a-z]+)"
s = "abc"
m = re.search(p,s)
g = m.group(1);
None
Group 2 following optional group not matched p = "([0-9]+)*([a-z]+)"
s = "abc"
m = re.search(p,s)
g = m.group(2);
abc
Complete match p = "([0-9]+)*-([a-z]+)"
s = "123-abc"
m = re.search(p,s)
g = m.group(0);
123-abc
Nested group 1 p = "(([0-9]+)([a-z]+))([A-Z]+)"
s = "123abcDEF"
m = re.search(p,s)
g = m.group(1);
123abc
Nested group 2 p = "(([0-9]+)([a-z]+))([A-Z]+)"
s = "123abcDEF"
m = re.search(p,s)
g = m.group(2);
123
Nested group 3 p = "(([0-9]+)([a-z]+))([A-Z]+)"
s = "123abcDEF"
m = re.search(p,s)
g = m.group(3);
abc
Nested group 4 p = "(([0-9]+)([a-z]+))([A-Z]+)"
s = "123abcDEF"
m = re.search(p,s)
g = m.group(4);
DEF
Find all
Find p = "[A-Za-z]+"
s = "one123 two 45 6"
f = re.findall(p,s)
'one' 'two'
Find delimited - including delimiter p = "[A-Za-z0-9]+(?:,|$)"
s = "one, 2, three"
f = re.findall(p,s)
'one,' '2,' 'three'
Find delimited - excluding delimiter p = "[A-Za-z0-9]+(?=,|$)"
s = "one, 2, three"
f = re.findall(p,s)
'one' '2' 'three'
Non-greedy
Greedy zero or more p = "<.*>"
s = "<html><head><title> Title </title></html>"
# regex parsing of xml/html
# is not recommended btw!
m = re.match(p, s)
<html><head><title> Title </title></html>
Non-greedy zero or more p = "<.*?>"
s = "<html><head><title> Title </title></html>"
# regex parsing of xml/html
# is not recommended btw!
m = re.match(p, s)
<html>
Greedy one or more p = "a[a-z]+a"
s = "aaaa";
m = re.match(p, s)
aaaa
Non-greedy one or more p = "a[a-z]+?a"
s = "aaaa";
m = re.match(p, s)
aaa
Greedy range # picks top of range
p = "a[a-z]{0,2}a"
s = "aaaaa";
m = re.match(p, s)
aaaa
Non-greedy range # picks bottom of range
p = "a[a-z]{0,2}?a"
s = "aaaaa";
m = re.match(p, s)
aa