Fundamentals 10 min read

Python Cheat Sheet for File Management, Data Processing, Web Requests, Automation, and Text Manipulation

This article provides a concise Python cheat sheet covering common file and directory operations, CSV/JSON data handling, web request and crawling techniques, automation tasks such as scheduling and email sending, and various string manipulation utilities, each illustrated with ready‑to‑run code snippets.

Test Development Learning Exchange

Sep 28, 2024

Python Cheat Sheet for File Management, Data Processing, Web Requests, Automation, and Text Manipulation

File and Directory Management

Examples for batch renaming, locating large files, creating nested directories, removing empty directories, copying, moving, reading, writing, appending files, and checking file existence.

import os
for filename in os.listdir('.'): 
    os.rename(filename, filename.replace('old', 'new'))

for root, dirs, files in os.walk('.'): 
    for name in files:
        if os.path.getsize(os.path.join(root, name)) > 1024 * 1024:
            print(os.path.join(root, name))

os.makedirs('dir/subdir/subsubdir', exist_ok=True)

for root, dirs, files in os.walk('.', topdown=False):
    for name in dirs:
        dir_path = os.path.join(root, name)
        if not os.listdir(dir_path):
            os.rmdir(dir_path)

import shutil
shutil.copy('source.txt', 'destination.txt')
shutil.move('source.txt', 'destination.txt')

with open('file.txt', 'r') as file:
    content = file.read()

with open('file.txt', 'w') as file:
    file.write('Hello, World!')

with open('file.txt', 'a') as file:
    file.write('
Append this line.')

if os.path.exists('file.txt'):
    print("File exists.")
else:
    print("File does not exist.")

Data Processing

Reading and writing CSV files, handling JSON data, and common list operations such as deduplication, sorting, reversing, merging, and finding min/max values.

import csv
with open('data.csv', 'r') as file:
    reader = csv.reader(file)
    for row in reader:
        print(row)

import csv
data = [['Name', 'Age'], ['Alice', 30], ['Bob', 25]]
with open('data.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerows(data)

import json
with open('data.json', 'r') as file:
    data = json.load(file)

import json
data = {'name': 'Alice', 'age': 30}
with open('data.json', 'w') as file:
    json.dump(data, file)

my_list = [1, 2, 2, 3, 4, 4, 5]
unique_list = list(set(my_list))

my_list = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
sorted_list = sorted(my_list)

my_list = [1, 2, 3, 4, 5]
reversed_list = list(reversed(my_list))

list1 = [1, 2, 3]
list2 = [4, 5, 6]
combined_list = list1 + list2

my_list = [1, 2, 3, 4, 5]
max_value = max(my_list)
min_value = min(my_list)

Network Requests and Crawling

Fetching web pages, parsing HTML with BeautifulSoup, downloading images, sending GET/POST requests, handling JSON responses, setting timeouts, managing exceptions, using sessions, and customizing request headers.

import requests
response = requests.get('https://www.example.com')
print(response.text)

from bs4 import BeautifulSoup
soup = BeautifulSoup(response.text, 'html.parser')
titles = soup.find_all('h1')
for title in titles:
    print(title.text)

img_data = requests.get('http://example.com/image.jpg').content
with open('image.jpg', 'wb') as handler:
    handler.write(img_data)

payload = {'key1': 'value1', 'key2': 'value2'}
response = requests.post('https://httpbin.org/post', data=payload)
print(response.text)

response = requests.get('https://api.example.com/data')
data = response.json()
print(data)

try:
    response = requests.get('https://www.example.com', timeout=5)
except requests.Timeout:
    print("The request timed out")

try:
    response = requests.get('https://www.example.com')
    response.raise_for_status()
except requests.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
except Exception as err:
    print(f"Other error occurred: {err}")

session = requests.Session()
response = session.get('https://www.example.com')
print(response.text)

print(response.headers)

headers = {'User-Agent': 'MyApp/1.0'}
response = requests.get('https://www.example.com', headers=headers)
print(response.text)

Automation Tasks

Scheduling recurring jobs, sending emails via SMTP, executing system commands, compressing and extracting zip files, monitoring file changes, and generating random numbers, strings, and passwords.

import schedule, time
def job():
    print("I'm working...")

schedule.every(10).seconds.do(job)
while True:
    schedule.run_pending()
    time.sleep(1)

import smtplib
from email.mime.text import MIMEText
msg = MIMEText('Hello, this is a test email.')
msg['Subject'] = 'Test Email'
msg['From'] = '[email protected]'
msg['To'] = '[email protected]'
s = smtplib.SMTP('localhost')
s.send_message(msg)
s.quit()

import subprocess
result = subprocess.run(['ls', '-l'], stdout=subprocess.PIPE)
print(result.stdout.decode('utf-8'))

import zipfile
with zipfile.ZipFile('archive.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipf.write('file.txt')
with zipfile.ZipFile('archive.zip', 'r') as zipf:
    zipf.extractall('extracted_files')

import time, os, hashlib
def get_file_hash(filename):
    hasher = hashlib.md5()
    with open(filename, 'rb') as f:
        buf = f.read()
        hasher.update(buf)
    return hasher.hexdigest()
last_hash = None
while True:
    current_hash = get_file_hash('file.txt')
    if current_hash != last_hash:
        print("File has changed!")
        last_hash = current_hash
    time.sleep(1)

import random
random_number = random.randint(1, 100)
print(random_number)
random_string = ''.join(random.choices(string.ascii_letters + string.digits, k=12))
print(random_string)
password = ''.join(random.choices(string.ascii_letters + string.digits, k=12))
print(password)

import os
api_key = os.getenv('API_KEY')
print(api_key)

Text Processing

Counting words, replacing substrings, splitting and joining strings, checking for substrings, changing case, trimming whitespace, removing all spaces, and formatting strings with f‑strings.

text = "This is a test. This is only a test."
word_count = len(text.split())
print(f"Word count: {word_count}")

text = "Hello, World!"
new_text = text.replace("World", "Python")
print(new_text)

text = "apple,banana,orange"
fruits = text.split(',')
print(fruits)

fruits = ['apple', 'banana', 'orange']
text = ', '.join(fruits)
print(text)

if "World" in text:
    print("Found 'World' in the text.")

text = "hello, world!"
print(text.upper())

text = "HELLO, WORLD!"
print(text.lower())

text = "   Hello, World!   "
print(text.strip())

text = "Hello,   World!"
print(text.replace(" ", ""))

name = "Alice"
age = 30
print(f"Name: {name}, Age: {age}")

Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.

Python Scripting Web Scraping File Management

Written by

Test Development Learning Exchange

0 followers

Reader feedback

How this landed with the community

Rate this article

Was this worth your time?

Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.