Python counts the number of all files in the specified folder, BFS method

  python

Python counts the number of all files under the specified folder

I have always had this requirement, but I used to write it recursively, feeling unfriendly to the occupation of resources, and python’s maximum recursion depth is not more than 1,000, so I changed it and implemented it by breadth-first traversal.
A total of 24 folders on two floors and a total of 50w of documents were measured. The running time is about 3 seconds. The following is the code:

import os
import queue

def get_file_quantity(folder: str) -> int:
    '''BFS获取文件夹下文件的总数量'''
    # 判断初始文件夹
    assert os.path.isdir(folder), '请输入有效的文件夹参数'
    file_quantity = 0                       # 初始化文件数量
    folder_path_queue = queue.Queue()
    folder_path_queue.put_nowait(folder)    # 初始化队列的值
    # 处理队列里的文件夹
    while not folder_path_queue.empty():
        folder = folder_path_queue.get_nowait()
        file_folder_list = list(map(lambda bar: os.path.join(folder, bar), os.listdir(folder)))
        folder_list = list(filter(lambda bar: os.path.isdir(bar), file_folder_list))
        for folder_path in folder_list:
            folder_path_queue.put_nowait(folder_path)
        temp_file_count = len(file_folder_list) - len(folder_list)
        file_quantity += temp_file_count
    return file_quantity
    
if __name__ == '__main__':
    file_quantity = get_file_quantity(r'/home')
    print(f'文件总数是: {file_quantity}')

Train of thought

The queue is mainly used here, which is the common idea of BFS.

A little change herefolder_list = list(filter(lambda bar: os.path.isdir(bar), file_folder_list))From insidelambdaFunction can realize various judgment operations on file name, and the realization of function function here depends entirely on one’s own brain hole!

Attach an adapted version: view the number of files that contain a specific suffix

Adapted version: view the number of files with specific suffixes

import os
import queue

def filter_extension(filename: str, extension: str) -> bool:
    '''判断文件路径名的后缀是否和给定的后缀字符串相同
    只是单纯的字符串判断
    '''
    basename_and_extension = filename.split('.')
    return (basename_and_extension[-1] == extension) and (len(basename_and_extension) >= 2)

def get_file_quantity(folder: str, extension: str) -> int:
    '''BFS获取文件夹下文件的总数量'''
    # 判断初始文件夹
    assert os.path.isdir(folder), '请输入有效的文件夹参数'
    assert isinstance(extension, str), '请输入有效的文件后缀名'
    file_quantity = 0                       # 初始化文件数量
    folder_path_queue = queue.Queue()
    folder_path_queue.put_nowait(folder)    # 初始化队列的值
    # 处理队列里的文件夹
    while not folder_path_queue.empty():
        folder = folder_path_queue.get_nowait()
        file_folder_list = list(map(lambda bar: os.path.join(folder, bar), os.listdir(folder)))
        folder_list = list(filter(lambda bar: os.path.isdir(bar), file_folder_list))
        file_list = list(filter(lambda bar: os.path.isfile(bar), file_folder_list))
        match_extension_list = list(filter(lambda bar: filter_extension(bar, extension), file_list))
        for folder_path in folder_list:
            folder_path_queue.put_nowait(folder_path)
        temp_file_count = len(match_extension_list)
        file_quantity += temp_file_count
    return file_quantity
if __name__ == '__main__':
    extension = 'py'
    file_quantity = get_file_quantity(r'/home', extension)
    print(f'包含后缀 {extension } 的文件的数量: {file_quantity}')