Common Data Validation Functions in Python Using Regular Expressions
This article provides a collection of Python functions that use regular expressions to validate common data types such as phone numbers, ID cards, names, emails, URLs, IP addresses, credit cards, dates, passwords, filenames, numeric strings, postal codes, HTML content, domains, repeated words, IPv6 addresses, and also demonstrates scheduled tasks with the schedule library.
1. Phone number validation
import re
def validate_phone_number(phone):
pattern = r'^1[3-9]\d{9}$'
return bool(re.match(pattern, phone))
# Example
print(validate_phone_number('13800138000')) # True2. ID card validation
def validate_id_card(id_card):
pattern = r'(^\d{15}$)|(^\d{17}([0-9]|X)$)'
return bool(re.match(pattern, id_card))
# Example
print(validate_id_card('11010519491231002X')) # True3. Chinese/English name validation
def validate_name(name):
pattern = r'^[\u4e00-\u9fa5a-zA-Z\s]+$'
return bool(re.match(pattern, name))
# Example
print(validate_name('张三')) # True
print(validate_name('Zhang San')) # True4. Email address validation
def validate_email(email):
pattern = r'^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$'
return bool(re.match(pattern, email))
# Example
print(validate_email('[email protected]')) # True5. URL validation
def validate_url(url):
pattern = r'^(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$'
return bool(re.match(pattern, url))
# Example
print(validate_url('http://www.example.com')) # True6. IP address validation
def validate_ip(ip):
pattern = r'^((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)$'
return bool(re.match(pattern, ip))
# Example
print(validate_ip('192.168.0.1')) # True7. Scheduled task using the schedule library
import schedule
import time
def job():
print("I'm working...")
schedule.every(10).minutes.do(job)
while True:
schedule.run_pending()
time.sleep(1)8. Credit card number validation
def validate_credit_card(card_number):
pattern = r'^(?:4[0-9]{12}(?:[0-9]{3})?|[25][1-7][0-9]{14}|6(?:011|5[0-9][0-9])[0-9]{12}|3[47][0-9]{13}|3(?:0[0-5]|[68][0-9])[0-9]{11}|(?:2131|1800|35)\d{3}\d{11})$'
return bool(re.match(pattern, card_number))
# Example
print(validate_credit_card('4111111111111111')) # True9. Date format (YYYY‑MM‑DD) validation
def validate_date(date_text):
pattern = r'^\d{4}-\d{2}-\d{2}$'
if re.match(pattern, date_text):
try:
datetime.datetime.strptime(date_text, '%Y-%m-%d')
return True
except ValueError:
return False
else:
return False
# Example
print(validate_date('2025-05-05')) # True10. Password strength validation (at least one uppercase, one lowercase, one digit, one special character)
def validate_password(password):
pattern = r'^(?=.*[a-z])(?=.*[A-Z])(?=.*\d)(?=.*[@$!%*?&])[A-Za-z\d@$!%*?&]{8,}$'
return bool(re.match(pattern, password))
# Example
print(validate_password('Password@123')) # True11. Filename validation (no special characters)
def validate_filename(filename):
pattern = r'^[\w,\s-]+\.[A-Za-z]{2,}$'
return bool(re.match(pattern, filename))
# Example
print(validate_filename('example.txt')) # True12. Integer validation
def validate_integer(integer_str):
pattern = r'^-?\d+$'
return bool(re.match(pattern, integer_str))
# Example
print(validate_integer('123')) # True
print(validate_integer('-123')) # True13. Float validation
def validate_float(float_str):
pattern = r'^-?\d+(\.\d+)?$'
return bool(re.match(pattern, float_str))
# Example
print(validate_float('123.45')) # True
print(validate_float('-123.45')) # True14. Chinese postal code validation
def validate_postal_code(postal_code):
pattern = r'^\d{6}$'
return bool(re.match(pattern, postal_code))
# Example
print(validate_postal_code('100000')) # True15. HTML tag removal
def remove_html_tags(text):
clean = re.compile('<.*?>')
return re.sub(clean, '', text)
# Example
html_content = "Hello, world!"
print(remove_html_tags(html_content)) # Hello, world!16. Extract domain from URL
def extract_domain(url):
pattern = r'https?://([A-Za-z_0-9.-]+).*'
match = re.match(pattern, url)
if match:
return match.group(1)
return None
# Example
print(extract_domain("http://www.example.com/path")) # www.example.com17. Check for repeated words in a string
def has_repeated_words(text):
pattern = r'\b(\w+)\s+\1\b'
return bool(re.search(pattern, text, flags=re.IGNORECASE))
# Example
print(has_repeated_words("This is is a test.")) # True18. IPv6 address validation
def validate_ipv6(ipv6):
pattern = r'^((?:[A-Fa-f0-9]{1,4}:){7}[A-Fa-f0-9]{1,4})$'
return bool(re.match(pattern, ipv6))
# Example
print(validate_ipv6('2001:0db8:85a3:0000:0000:8a2e:0370:7334')) # True19. Clean phone number by removing non‑digit characters
def clean_phone_number(phone):
cleaned = re.sub(r'\D', '', phone) # \D matches any non‑digit character
return cleaned
# Example
print(clean_phone_number('+1-800-123-4567')) # 1800123456720. Extract all email addresses from text
def extract_emails(text):
emails = re.findall(r'[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+', text)
return emails
# Example
text = "Contact us at [email protected] or [email protected]"
print(extract_emails(text)) # ['[email protected]', '[email protected]']Test Development Learning Exchange
Test Development Learning Exchange
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.