In-Class Exercise Solutions, Session 4

Advanced Python
In-Class Exercise Solutions, Session 4


	MATCHING
Ex. 4.1	Match a simple character pattern.
	Search for 'Velas', then try 'Benter' and 'Acme'.
	import re lines = [ 'Acme Corporation is heded by CEO Joseph Benter, and ', 'President Maria Velas. Mr. Benter focuses on R&D ', 'while Ms. Velas provides vision and major deals for ', 'Acme. '] # list of lists for line in lines: # str, 'Acme Corporation is ... ' if re.search(r'Velas', line): # bool, False (first string) print(line)

Ex. 4.2	'not' to negate a search. Execute previous pattern with 'not' in front of re.search()
	import re lines = [ 'Acme Corporation is heded by CEO Joseph Benter, and ', 'President Maria Velas. Mr. Benter focuses on R&D ', 'while Ms. Velas provides vision and major deals for ', 'Acme. '] # list of strings for line in lines: # str, 'Acme Corporation ...' if not re.search(r'Benter', line): # str, False (first line) print(line)


	ANCHORS
Ex. 4.3	Anchors - start of string.
	Print only those lines that have 'TEL' at the start:
	import re for text_line in ['AURORA HOTEL', 'OPEN12:00 AM - 11:59 PM', '14200 E ALAMEDA AVE AURORA, CO 80012', 'TEL (303) 344-9901']: # list of strings if re.search(r'^TEL', text_line): # bool, False (first string) print(text_line)

Ex. 4.4	Anchors - end of string.
	Print only those files that end in .jpg
	import re filenames = ['image.jpg', 'image.png', 'filejpg.txt', 'file2.doc', 'file3.pdf', 'image2.gif', 'image3.jpg', 'image4.jpg'] for name in filenames: # str, 'image.jpg' if re.search(r'\.jpg$', name): # bool, True (first string) print(name)


	BUILT-IN CHARACTER CLASSES
Ex. 4.5	"Digit" character class.
	Match on each string that has a digit.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'Hello world 00' if re.search(r'\d', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.6	"Word" character class.
	Match each string that has a letter, number or underscore.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\w', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.7	"Space" character class.
	Match on each line that has a space.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\s', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')


	INVERSE CHARACTER CLASSES
Ex. 4.8	"Not a digit" character class.
	Match on each string that has a character that is not a digit.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\D', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.9	"Not a space" character class.
	Match on each string that has any non-spaces.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', ' ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\S', string): # bool True print(string) count += 1 # int, 1 print(f'count: {count}')


	CUSTOM CHARACTER CLASSES
Ex. 4.10	Custom character class.
	Match on each string that has a capital letter in it.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[A-Z]', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.11	Using custom character class with built-in character class.
	Match on each string that has a letter followed by a number.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[A-Za-z]\d', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')


	INVERSE CUSTOM CHARACTER CLASSES
Ex. 4.12	Inverse Custom Character Class. Match on each string that has any character that is not a letter.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[^a-zA-Z]', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.13	Match on each string that ends with a character that is not a digit.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\D$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')


	THE WILDCARD
Ex. 4.14	Demo: match on any character.
	Use the wildcard (., a period) to see which strings match it.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'.', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')


	LAB 1
Ex. 4.15	Match on each string that starts with a digit.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\d', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.16	Match on each string that starts with a space.
	import re lines = [ 'this is the first line,', ' and this is the second line and', ' this is the third line. ' ] # list of strings for line in lines: # str, 'this is the first line,' if re.search(r'^\s', line): # bool False print(line)

Ex. 4.17	Loop through and print only lines with some text (not including spaces).
	import re text = """line 1 line 2, line 3... line4!""" # str lines = text.splitlines() # list, ['line1', 'line2,', '', 'line3...' ...] for line in lines: # str, line1 if re.search(r'\S', line): # bool True print(line)

Ex. 4.18	Match on each string that ends with a digit.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\d$', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.19	Match on each line that ends with a space.
	import re lines = [ 'this is the first line, ', 'this is the second line and', 'this is the third line. ' ] # list of strings for line in lines: # str, 'this is the first line, ' if re.search(r'\s$', line): # bool, True (first string) print(line)

Ex. 4.20	Match on each string that consists only of a 2-digit number.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\d\d$', string): # bool False print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.21	Match on a capital letter followed by a lowercase letter.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[A-Z][a-z]', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.22	Match on files with date format YYYY-MM-DD followed by '.txt'.
	import re dirlist = ('.', '..', '2010-12-15.txt', '2010-12-16.txt', 'testfile.txt', '20101-11-03.txt') # tuple, ('.', '..', ...) for item in dirlist: # str, '.' if re.search(r'^\d\d\d\d\-\d\d\-\d\d\.txt$', item): # bool, False (first string) print(item)

Ex. 4.23	Match on date format MM/DD/YY (and not 4-digit year).
	import re dates = ['Jan. 3, 2018', '23-Mar-17', '12/02/98', '12/03/1998', '23.17.2018'] # list of strs for date in dates: # str, 'Jan. 3, 2018 ' if re.search(r'^\d\d\/\d\d\/\d\d$', date): # bool, False (first string) print(date)

Ex. 4.24	Determine whether selected word begins with a vowel. If so, prepend an 'an' rather an an 'a'.
	import re words = ['apple', 'pear', 'orange', 'kiwi', 'elderberry', 'carrot', 'ugli fruit']# for word in words: # str, 'apple' if re.search(r'^[aeiou]', word): # bool, True (first string) prepend = 'an' # str, 'an' else: prepend = 'a' # str, 'a' print(f"{prepend} {word}")


	BUILT-IN QUANTIFIERS
Ex. 4.25	"One or more" quantifier. Match on each string that has one or more letters in it.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[a-zA-Z]+', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.26	"Zero or one" quantifier.
	Without using a character class (or grouped alternates), use a single regex that matches on each string that has 'a' or 'an' followed by a space.
	import re lines = [ 'This is a wonderful thing. ', "I haven't seen anything like it. ", "Isn't it an exceptional experience? "] # list of strs for line in lines: # str, 'This is a wonderful thing. ' if re.search(r'an?\s', line): # bool, True (first string) print(line)

Ex. 4.27	"Zero or more" quantifier, quantifiers with anchor.
	Match on all strings that consist only of a 1 followed by zero or more digits.
	import re numbers = [ '100', '135', '31', '1', '1 think', ] # list of strs for val in numbers: # str, '100' if re.search(r'^1\d*$', val): # bool, True (first string) print(val)

Ex. 4.28	Quantifiers with Anchor. Match on each string that consists only of one or more digit characters.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\d+$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.29	Quantifiers with Anchor (3). Match on each string that consists only of letters.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^[a-zA-Z]+$', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.30	Quantifiers with custom character class.
	Match each string that has a capital letter followed by one or more lowercase letters.
	import re match_strings = [ 'hello World 00', 'goodbye C world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[A-Z][a-z]+', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.31	Quantifiers with anchors. Match on each string that consists only of letters, numbers or the underscore.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\w+$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.32	Quantifiers with anchors (2). Match on each string that consists only of non-digits.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\D+$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.33	Quantifiers with anchors (3). Match on each string that consists only of non-spaces.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'^\S+$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')


	CUSTOM QUANTIFIERS
Ex. 4.34	Custom quantifier.
	Match on each string that has two or more spaces at the end.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\s{2,}$', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.35	Custom quantifier.
	Match on strings that have a capital letter followed by two or more lowercase letters.
	import re match_strings = [ 'hello World 00', 'goodbye As world ', 'To 23 bonjour', 'wilkommen23 ', 'Aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'[A-Z][a-z]{2,}', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.36	Custom quantifier.
	Print those numbers that are in the millions (i.e., 7 or more digits).
	import re nums = [ '1', '10', '100', '1000', '10000', '100000', '1000000', '10000000' ] # list of strs for num in nums: # str, '1' if re.search(r'\d{7,}', num): # bool, False (first string) print(num)

Ex. 4.37	Custom quantifier. Having split the text into words, show those words that are greater than 7 characters in size.
	import re text = """This is the 1000th story, regarding a duck named Quack. It was unlikely that Quack could have been given a name like that by his mother, so we can only conclude that he was named by the author, who has a cuteness problem.""" # str words = text.split() # list, ['This', 'is', ... ] stripped = [ word.rstrip('.,') for word in words ] # list, ['This', 'is', ... ] for word in stripped: # str, 'This' if re.search(r'\w{7,}', word): # bool, False (first string) print(word)

Ex. 4.38	Custom Quantifier.
	A password must be 3-8 characters in length (letters, numbers and underscores are permitted). Validate the below password attempts.
	import re attempts = [ '1234', 'hello_there', 'password', 'ok', 'what?', 'supercalifrag'] # list of strings for password in attempts: # str, '1234' if re.search(r'^\w{3,8}$', password): # bool, True (first string) print(f'{password}: validated')


	ESCAPING SPECIAL CHARACTERS
Ex. 4.39	Escape wildcard (aka period). Match on each string that has a letter, number or underscore followed by a period.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\w\.', string): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.40	Escape end anchor (aka dollar sign).
	Match on strings that have a dollar amount, including two decimal places ($23.53).
	import re lines = [ 'The coat cost $239.50.', 'The candy cost $1.93', "I didn't buy anything today.", '$1 sale', 'I dream of $$$' ] # list of strings for line in lines: # str, 'The coat cost $239.50' if re.search(r'\$\d+\.\d+', line): # bool, True (first string) print(line)

Ex. 4.41	Escape quantifier character +.
	Match on all lines with positive numbers.
	import re numbers = [ 'Amount: -23.9', 'Amount: +43.8', 'Amount: -9.03', 'Amount: +99.9', 'Amount: +22.0' ] # list of strings for num in numbers: # str, 'Amount: -23.9' if re.search(r'\+\d+\.\d+', num): # bool, False (first string) print(num)

Ex. 4.42	Escape quantifier character *.
	Match on all lines with asterisked footnotes.
	import re numbers = [ 'As Ibid* said,', 'there should be no greater good ', 'than compassion, love, ', 'mutual benefit', 'and the profit-making motive.', ] # list of strings for num in numbers: # str, 'As Ibid* said,' if re.search(r'\*', num): # bool, True (first string) print(num)


	LAB 2
Ex. 4.43	Match on each string that has one or more "word" characters, followed by one or more spaces, followed by one or more "word" characters.
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\w+\s+\w+', string): # bool, True (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.44	Ignore comment lines: print only those lines that don't start with a comment (the first non-space character is a hash mark).
	import re text = """ # this is a program to do stuff a = 5 b = 10 # an int if True: # multiply them c = a * b """ for line in text.splitlines(): if not re.search(r'^\s*#', line): print(line)

Ex. 4.45	Match those lines that contain a 7-digit hex number (a-fA-F0-9).
	import re lines = [ 'The color code is #ABF2307.', 'Mr. Mxyzptlk is 999 years old today.', 'The memory address is fc9d223.' ] # list of strings for line in lines: # str, 'The color code is #ABF2307.' if re.search(r'[a-fA-F0-9]{7}', line): # bool, True (first string) print(line)

Ex. 4.46	Show those lines that contain two capitalized words (as in a name).
	import re lines = [ 'The owner is Gwen Harstridge.', "There aren't a lot of stores like this one.", 'Paris is not a lot like Rome.', 'I hail from Los Angeles, California.' ] # list of strings for line in lines: # str, 'The owner is Gwen Harstridge.' if re.search(r'[A-Z][a-z]+\s+[A-Z][a-z]+', line): # bool, True (first string) print(line)


	re.IGNORECASE
Ex. 4.47	Without using a character class, match on each string that ends in .jpg or .JPG (try this another way).
	(hint: use the flag argument (the optional 3rd argument) to re.search())
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\.jpg$', string, re.I): # bool, False (first string) print(string) count += 1 # int, 1 print(f'count: {count}')

Ex. 4.48	Print only those files that start with 'image#' ('image' plus a possible number) and end in any of these image extensions: '.jpg', '.png', '.gif'
	import re filenames = ['image2.jpg', 'image.png', 'file.txt', 'file2.doc',# 'file3.pdf', 'image2.gif', 'image3.jpg', 'image4.jpg',# 'advert.jpg', 'advert.png'] # list of strings for name in filenames: # str, 'image2.jpg' if re.search(r'image\d?\.(jpg\|png\|gif)$', name): # bool, True (first string) print(name)

Ex. 4.49	Match on each string that ends in .jpg or .JPG
	import re match_strings = [ 'hello world 00', 'goodbye world ', ' 23 bonjour', 'wilkommen23 ', 'aloha', '99', '00', '88557799', 'Que 3 Tal!', 'myfile.jpg', 'yourfile.JPG' ] # list of strings count = 0 # int, 0 for string in match_strings: # str, 'hello world 00' if re.search(r'\.(jpg\|JPG)', string): # bool, False (first string) # if re.search(r'\.JPG', string, re.I): # alternative print(string) count += 1 # int, 1 print(f'count: {count}')


	GROUPING FOR QUANTIFYING and ALTERNATES
Ex. 4.50	Quantifying a group. Match on a number with two decimal places and possible thousandths separator (3.95, 3,200.95, etc.)
	First create a pattern that is 1 or more digits with comma separator (i.e. matching on 0,, 00, 000,) and group the number with parentheses; quantify the group to say that there is zero or more of these, followed by one or more digits, a period and 2 digits. (Do not use a custom character class for this purpose.)
	import re values = ['23.9', '18.2', '23.95', '2,238,000.00', '15,382.92', 'joe', '6.05'] # list of str for value in values: # str, '23.9' matchobj = re.search(r'^(\d+,)*\d+\.\d\d$', value) # re.Match object if matchobj: # bool, True print(value)

Ex. 4.51	Quantifying a Group (2). Write a single regex that matches on q, Q, quit, Quit, QUIT. Do this without a character class and without the alternate vertical bar.
	import re x = input('Do you want to quit? ') # str, 'QuIt' (sample input) if re.search(r'q(uit)?', x, re.I): # bool, True print("you're quitting!") else: print("you failed to quit.")


	GROUPING FOR EXTRACTION
Ex. 4.52	Group for extraction.
	Use a parenthetical grouping to extract the number from this text.
	import re line = '34: this is a line of text' # str, '34: this is a line of text' matchobj = re.search(r'(\d+)', line) # re.Match object print(matchobj.group(1)) # str, '34'

Ex. 4.53	Group for extraction. Extract the Catalog ID and Publication Date from the text line.
	import re rs_row = 'Catalog ID: 2839-587 Pub. Date: 2019-09-03' # str, 'Catalog ID...' matchobj = re.search(r'Catalog\s+ID:\s+(\d+\-\d+)\s+Pub.\s+Date:\s+(\d\d\d\d\-\d\d\-\d\d)', rs_row) # re.Match object if matchobj: # bool, True print(matchobj.group(1)) # str, '2839-587' print(matchobj.group(2)) # str, '2019-09-03'

Ex. 4.54	Group for extraction. In one regex match, extract the IP address from this log line.
	import re line = '172.26.93.208 - - [28/Jun/2012:21:00:17 -0400] "GET /~cmk380/pythondata/image2b.txt HTTP/1.1" 200 30' # matchobj = re.search(r'^(\d{2,3}\.\d{2,3}\.\d{2,3}\.\d{2,3})', line) # re.Match object if matchobj: # bool, True print(matchobj.group(1)) # str, '172.26.93.208'


	'MINIMAL MATCH' QUANTIFIER
Ex. 4.55	Demonstration: "minimal" match.
	The below regex grabs the word Python from the text. Run the code once to observe this. Now add a question mark ? as the character directly after the "one or more" plus sign and run again - you should see that the "one or more word characters" pattern is now matching on as few characters as possible.
	import re text = 'My language is Python' # str matchobj = re.search(r'My language is (\w+)', text) # re.Match object print(matchobj.group(1)) # str, 'Python'

Ex. 4.56	Work with wildcard and minimal match.
	Use the wildcard to match everything between the first two brackets. Note carefully what was printed.
	import re text = 'Discussion of terms <TO COME> after something <PLEASE REVIEW>.'# matchobj = re.search(r'(<.+?>)', text) # re.Match object print(matchobj.group(1)) # str, '<TO COME>'

Ex. 4.57	Match on non-search character.
	Perform the same extraction on the below text by searching for a bracket followed by one or more non-brackets. Text extracted should be the same.
	import re text = 'Discussion of terms <TO COME> after something <PLEASE REVIEW>.'# matchobj = re.search(r'<([^>]+)>', text) # re.Match object print(matchobj.group(1)) # str, 'TO COME'


	GROUPING with .groups()
Ex. 4.58	Retrieve a grouping with .groups().
	In one regex match, extract the status code and bytes downloaded (last 2 integers on the line) from this log line. Call .groups() the match object to reveal the extracted values.
	import re line = '172.26.93.208 - - [28/Jun/2012:21:00:17 -0400] "GET /~cmk380/pythondata/image2b.txt HTTP/1.1" 200 30' # str, '172.25.93.208 - - ...' matchobj = re.search(r'(\d+)\s+(\d+)$', line) # re.Match object if matchobj: # bool, True print(matchobj.groups()) # list, ['200', '30']

Ex. 4.59	Retrieve a grouping with .groups().
	Extract city, state zip from line.
	import re line = 'Los Angeles, CA 91604' # str, 'Los Angeles ...' matchobj = re.search(r'([^,]+),\s+([A-Z]{2})\s+(\d+)', line) # re.Match object print(matchobj.groups()) # list, ['Los Angeles', 'CA', '91604']

Ex. 4.60	Quantify for an optional group.
	Pull out all the info about each person (Favorite Color may not be there).
	import re results = [ 'Name: Joe; Favorite Color: Blue; Employee ID: 2395', 'Name: Marie; Employee ID: 2321', 'Name: Teneski; Favorite Color: Green; Employee ID: 1913' ] # list of strings for row in results: # str, 'Name: Joe; Favorite ...' matchobj = re.search(r'^Name:\s+([A-Za-z]+);\s+(Favorite Color:\s+([A-Za-z]+);\s+)?Employee ID:\s+(\d+)', row) # re.Match object print(matchobj.groups()) # list, ['Joe', 'Blue', '2395']


	findall() FOR MULTIPLE MATCHES
Ex. 4.61	Group and extract with findall().
	Extract email addresses only for nyu.edu.
	import re text = """There are many ways to contact us. Use the general email contact@nyu.edu, or email our public liason at help@nyu.edu. If you need tech support you can reach us at askits@nyu.edu. Author: Joe Wilson joe@wilson.com""" # str emails = re.findall(r'[a-z]+@nyu.edu', text) # list, ['contact@nyu.edu', 'help@nyu.edu' ... ] print(emails)


	re.sub() FOR SUBSTITUTIONS
Ex. 4.62	Regex substitution. Replace space-separated with comma separated
	import re args = 'this that other and some other' # str, 'this that other ... ' args2 = re.sub(r'\s+', ",", args) # str, 'this,that,other...' print(args2)


	re.split() FOR PATTERN-BASED DELIMITERS
Ex. 4.63	Regex split. Split the user-input comma-separated values string into separate digit values.
	import re ui = '23, 14, 7,3,9' # str, '23, 14, 7...' numbers = re.split(r',\s*', ui) # list, ['23', '14', '7', '3', '9'] print(numbers)

Ex. 4.64	DOTALL wildcard match.
	Extract everything between =code start= and = code end =. Use the re.DOTALL switch to use the wildcard (.) to match on a newline.
	import re text = """Title of This Text This is some description... =code start= a = 5 b = 5.0 if a == b: print('yes') =code end= This is some discussion... """ # str matchobj = re.search(r'=code start=(.+)=code end=', text, re.DOTALL) # re.Match object print(matchobj.group(1)) # str, 'a = 5\nb = 5.0\nif a ...'

Ex. 4.65	Multiline anchors.
	Use findall() to extra numbers from only the start of each line of the text. Use re.MULTILINE to allow the carrot (^) to match at the start of any line.
	import re text = """Title of This Text 23 we want to grab some 99 numbers 12 but not others, 17 and then some 5 so we just get 1 the ones on the left side 93 and me and 23 too """ # str matches = re.findall(r'^\d+', text, re.MULTILINE) # ['23', '12', '5', '93'] print(matches)