Python Project: Download Bilibili Video, Extract Frames, Perform Human Segmentation, Generate Word Cloud, and Compose Final Video
This tutorial walks through a complete Python workflow that downloads a B‑site video, extracts frames with OpenCV, uses Baidu AI for human segmentation, crawls danmu comments, creates a masked word‑cloud animation, and finally merges the clips with audio into a polished video.
The article presents a step‑by‑step Python project aimed at beginners who want to practice video processing, computer‑vision, and AI techniques on a Bilibili video.
Import Required Modules
First, the script automatically installs the necessary libraries using os.system() and then imports them:
import os
import time
libs = {"lxml","requests","pandas","numpy","you-get","opencv-python","fake_useragent","matplotlib","moviepy"}
try:
for lib in libs:
os.system(f"pip3 install -i https://pypi.doubanio.com/simple/ {lib}")
print(lib + "下载成功")
except:
print("下载失败")
import os
import re
import cv2
import jieba
import requests
import moviepy
import pandas as pd
import numpy as np
from PIL import Image
from lxml import etree
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from fake_useragent import UserAgentVideo Download
Using you-get the target Bilibili video is downloaded:
pip install you-get
you-get -i https://www.bilibili.com/video/BV11C4y1h7nXVideo Splitting (Frame Extraction)
OpenCV reads the video and saves each frame as an image file:
# -*- coding:utf-8 -*-
import cv2
cap = cv2.VideoCapture(r"无价之姐~让我乘风破浪~~~.flv")
num = 1
while True:
ret, frame = cap.read()
if ret:
cv2.imwrite(f"./pictures/img_{num}.jpg", frame)
num += 1
else:
break
cap.release()Human Segmentation with Baidu AI
After creating a Baidu AI body‑analysis application, the script sends each extracted frame to the API and saves the segmented foreground:
# -*- coding:utf-8 -*-
import cv2, base64, numpy as np, os, time, random
from aip import AipBodyAnalysis
APP_ID = '******'
API_KEY = '*******************'
SECRET_KEY = '********************'
client = AipBodyAnalysis(APP_ID, API_KEY, SECRET_KEY)
path = './mask_img/'
img_files = os.listdir('./pictures')
for num in range(1, len(img_files)+1):
img_path = f'./pictures/img_{num}.jpg'
with open(img_path, 'rb') as fp:
img_info = fp.read()
seg_res = client.bodySeg(img_info)
labelmap = base64.b64decode(seg_res['labelmap'])
nparr = np.frombuffer(labelmap, np.uint8)
labelimg = cv2.imdecode(nparr, 1)
labelimg = cv2.resize(labelimg, (width, height), interpolation=cv2.INTER_NEAREST)
new_img = np.where(labelimg == 1, 255, labelimg)
mask_name = path + f'mask_{num}.png'
cv2.imwrite(mask_name, new_img)
print(f'======== 第{num}张图像分割完成 ========')Danmu (Comment) Crawling
The script builds a date range, sends requests to Bilibili’s danmu API, and stores the results in an Excel file:
import requests, pandas as pd, re, csv, datetime
from fake_useragent import UserAgent
from concurrent.futures import ThreadPoolExecutor
ua = UserAgent()
url = "https://api.bilibili.com/x/v2/dm/history"
start, end = '20200808', '20200908'
date_list = [x for x in pd.date_range(start, end).strftime('%Y-%m-%d')]
def Grab_barrage(date):
headers = {"origin": "https://www.bilibili.com", "referer": "https://www.bilibili.com/video/...", "cookie": "", "user-agent": ua.random()}
params = {'type':1, 'oid':'222413092', 'date':date}
r = requests.get(url, params=params, headers=headers)
r.encoding = 'utf-8'
comments = re.findall('<d p=".*?">(.*?)</d>', r.text)
for i in comments:
df.append(i)
# Parallel execution
with ThreadPoolExecutor(max_workers=4) as executor:
executor.map(Grab_barrage, date_list)
pd.DataFrame(df).to_excel('danmu.xlsx')
print(f'用时:{(datetime.datetime.now()-start_time).total_seconds()}s')Word Cloud Generation
After cleaning the comments (removing repeated characters) and loading custom stop‑words, the script creates a masked word‑cloud for each frame and saves the images:
import collections, jieba, re, numpy as np
from wordcloud import WordCloud
from PIL import Image
import matplotlib.pyplot as plt
# Load and preprocess text
with open('barrages.txt', 'r', encoding='utf-8') as f:
data = f.read()
new_data = '/'.join(re.findall('[\u4e00-\u9fa5]+', data))
seg_list = jieba.cut(new_data, cut_all=True)
# Remove stop words
with open('stoplist.txt', 'r', encoding='utf-8') as f:
stop_words = set(f.read().split('\n'))
words = [w for w in seg_list if w not in stop_words and len(w)>1]
word_counts = collections.Counter(words)
for num in range(1, len(os.listdir('./mask_img'))+1):
mask = 255 - np.array(Image.open(f'./mask_img/mask_{num}.png'))
wc = WordCloud(background_color='black', mask=mask, mode='RGBA', max_words=500,
font_path='simhei.ttf').generate_from_frequencies(word_counts)
plt.figure(figsize=(8,5), dpi=200)
plt.imshow(wc)
plt.axis('off')
wc.to_file(f'./wordcloud/wordcloud_{num}.png')
print(f'======== 第{num}张词云图生成 ========')Video Composition
Using OpenCV, the generated word‑cloud images are stitched into a video file:
import cv2, os
video_dir = 'result.mp4'
fps = 30
img_size = (1920,1080)
fourcc = cv2.VideoWriter_fourcc('M','P','4','V')
videoWriter = cv2.VideoWriter(video_dir, fourcc, fps, img_size)
for i in range(88, 888):
img_path = f'./wordcloud/wordcloud_{i}.png'
frame = cv2.imread(img_path)
frame = cv2.resize(frame, img_size)
videoWriter.write(frame)
print(f'======== 第{i}张图片合进视频 ========')
videoWriter.release()Audio Adding
MoviePy adds a background music track to the composed video and writes the final output:
import moviepy.editor as mpy
my_clip = mpy.VideoFileClip('result.mp4')
audio_background = mpy.AudioFileClip('song.mp3').subclip(0,25)
final_clip = my_clip.set_audio(audio_background)
final_clip.write_videofile('final_video.mp4')The resulting video shows a dancing word‑cloud animation synchronized with music, demonstrating a complete end‑to‑end pipeline from data acquisition to visual storytelling.
Python Programming Learning Circle
A global community of Chinese Python developers offering technical articles, columns, original video tutorials, and problem sets. Topics include web full‑stack development, web scraping, data analysis, natural language processing, image processing, machine learning, automated testing, DevOps automation, and big data.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.