Fundamentals 6 min read

Python‑docx Guide: Extract, Create, and Manipulate Word Documents

This tutorial demonstrates how to use the python‑docx library to extract text and images, create new documents, replace content, add pictures, build and fill tables, delete paragraphs, set styles, and insert headers and footers in Word files, providing ready‑to‑run code examples for each operation.

Test Development Learning Exchange
Test Development Learning Exchange
Test Development Learning Exchange
Python‑docx Guide: Extract, Create, and Manipulate Word Documents

This guide shows how to work with Microsoft Word files using the python-docx library, covering common tasks such as extracting text, creating documents, modifying content, and customizing layout.

Extract all text from a Word document from docx import Document def extract_text_from_docx(docx_path): doc = Document(docx_path) return '\n'.join([para.text for para in doc.paragraphs]) docx_path = 'path/to/your/document.docx' text = extract_text_from_docx(docx_path) print(text)

Create a new Word document and add text from docx import Document def create_new_docx_with_text(file_path, text): doc = Document() doc.add_paragraph(text) doc.save(file_path) file_path = 'path/to/new_document.docx' text = "Hello, this is a new document." create_new_docx_with_text(file_path, text)

Replace text in a Word document from docx import Document def replace_text_in_docx(docx_path, old_text, new_text): doc = Document(docx_path) for paragraph in doc.paragraphs: if old_text in paragraph.text: inline = paragraph.runs for i in range(len(inline)): if old_text in inline[i].text: text = inline[i].text.replace(old_text, new_text) inline[i].text = text doc.save(docx_path) docx_path = 'path/to/your/document.docx' replace_text_in_docx(docx_path, "old text", "new text")

Add an image to a Word document from docx import Document def add_image_to_docx(file_path, image_path): doc = Document() doc.add_picture(image_path, width=None, height=None) doc.save(file_path) file_path = 'path/to/new_document_with_image.docx' image_path = 'path/to/your/image.png' add_image_to_docx(file_path, image_path)

Extract all images from a Word document from docx import Document def extract_images_from_docx(docx_path, output_dir): doc = Document(docx_path) images = [] for rel in doc.part.rels.values(): if "image" in rel.reltype: images.append(rel.target_part.blob) for index, image in enumerate(images): with open(f"{output_dir}/image_{index}.png", "wb") as f: f.write(image) docx_path = 'path/to/your/document.docx' output_dir = 'path/to/output/images' extract_images_from_docx(docx_path, output_dir)

Create a table in a Word document from docx import Document def create_table_in_docx(file_path): doc = Document() table = doc.add_table(rows=1, cols=3) hdr_cells = table.rows[0].cells hdr_cells[0].text = 'Qty' hdr_cells[1].text = 'Id' hdr_cells[2].text = 'Desc' doc.save(file_path) file_path = 'path/to/new_document_with_table.docx' create_table_in_docx(file_path)

Fill a table with data from docx import Document def fill_table_in_docx(file_path, data): doc = Document(file_path) table = doc.tables[0] for item in data: row_cells = table.add_row().cells row_cells[0].text = str(item['Qty']) row_cells[1].text = item['Id'] row_cells[2].text = item['Desc'] doc.save(file_path) file_path = 'path/to/document_with_table.docx' data = [ {'Qty': 1, 'Id': '123', 'Desc': 'First item'}, {'Qty': 2, 'Id': '456', 'Desc': 'Second item'} ] fill_table_in_docx(file_path, data)

Delete specific paragraphs from docx import Document def delete_paragraph(paragraph): p = paragraph._element p.getparent().remove(p) p._p = p._element = None def remove_paragraphs_in_docx(docx_path, to_delete): doc = Document(docx_path) for para in doc.paragraphs: if para.text in to_delete: delete_paragraph(para) doc.save(docx_path) docx_path = 'path/to/your/document.docx' to_delete = ["This is the text to be deleted"] remove_paragraphs_in_docx(docx_path, to_delete)

Set document style from docx import Document from docx.shared import Pt def set_style_in_docx(docx_path): doc = Document(docx_path) style = doc.styles['Normal'] font = style.font font.name = 'Times New Roman' font.size = Pt(12) doc.save(docx_path) docx_path = 'path/to/your/document.docx' set_style_in_docx(docx_path)

Add header and footer from docx import Document def add_header_footer(docx_path): doc = Document(docx_path) section = doc.sections[0] header = section.header footer = section.footer header.paragraphs[0].text = "This is the header" footer.paragraphs[0].text = "This is the footer" doc.save(docx_path) docx_path = 'path/to/your/document.docx' add_header_footer(docx_path)

Pythonautomationfile-iodocxword-processing
Test Development Learning Exchange
Written by

Test Development Learning Exchange

Test Development Learning Exchange

0 followers
Reader feedback

How this landed with the community

login Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.