遍历目录及所有文件, 读取前8个字节判断文件的类型并添加后缀
import os
import shutil
# 魔数与文件类型的映射
MAGIC_NUMBERS = {
b'\xff\xd8\xff': 'jpg', # JPEG
b'\x89PNG\r\n\x1a\n': 'png', # PNG
b'GIF87a': 'gif', # GIF 87a
b'GIF89a': 'gif', # GIF 89a
b'\x42\x4d': 'bmp', # BMP
b'\x25\x50\x44\x46': 'pdf', # PDF
b'\x50\x4b\x03\x04': 'zip', # ZIP
b'\x52\x61\x72\x21': 'rar', # RAR
b'\x37\x7a\xbc\xaf\x27\x1c': '7z', # 7-Zip
b'\x49\x44\x33': 'mp3', # MP3
b'\x66\x74\x79\x70': 'mp4', # MP4
b'\x52\x49\x46\x46': 'avi', # AVI or WAV, 需进一步区分
b'\x4d\x5a': 'exe', # EXE
b'\x75\x73\x74\x61\x72': 'tar', # TAR
b'\x7f\x45\x4c\x46': 'elf', # ELF
b'\x46\x4c\x56': 'flv', # FLV
b'\x49\x49\x2a\x00': 'tif', # TIFF (Little Endian)
b'\x4d\x4d\x00\x2a': 'tif', # TIFF (Big Endian)
b'\x52\x49\x46\x46': 'webp', # WebP 需结合文件内容进一步区分
b'\x56\x31\x4d\x4d': 'vmdk', # 存储虚拟机的数据
# 根据需要扩展
}
def get_file_type(file_path):
with open(file_path, 'rb') as f:
file_start = f.read(8) # 读取前8个字节
for magic, filetype in MAGIC_NUMBERS.items():
if file_start.startswith(magic):
return filetype
return None
def rename_file(file_path, new_extension):
base = os.path.splitext(file_path)[0]
new_file_path = f"{base}.{new_extension}"
shutil.move(file_path, new_file_path)
return new_file_path
def process_directory(directory):
for root, dirs, files in os.walk(directory):
for filename in files:
file_path = os.path.join(root, filename)
# 获取文件类型
file_type = get_file_type(file_path)
if file_type:
new_file_path = rename_file(file_path, file_type)
print(f"Renamed: {file_path} -> {new_file_path}")
else:
print(f"Could not determine file type for: {file_path}")
if __name__ == "__main__":
directory_to_process = r"C:\Users\Administrator\Desktop\data"
process_directory(directory_to_process)
1万+

被折叠的 条评论
为什么被折叠?



