Получение статистики по файлам в директории
Материал из Linux Wiki
Версия от 00:05, 8 марта 2010; Jolly Roger (обсуждение | вклад) (Создана новая страница размером Для получения статистики по типам файлов в директории можно с помощью след...)
Для получения статистики по типам файлов в директории можно с помощью следующего скрипта. Помимо основной цели скрипт может "сортировать файлы по директориям, исходя из их типа. Для получения типа файла используется эвристический анализатор file:
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Script generates statistics table for specified direcories sorted by type(by
# default), overall files size or files count. This script can also sort files
# into the directories by type using hardlinks(default) or softlinks. Type
# determination is done by `file` utility.
#
# To get help just run script with wrong parameters.
import os
import sys
import subprocess
import string
import getopt
import operator
def usage():
print "Usage: fmetric.py [-C|-S] [-h] [-m] [-b [-s]] [-q] DIR [DIR] ...\n\
\t-C\tSort by files count\n\
\t-S\tSort by overall size\n\
\t-h\tShow sizes in human-readable format\n\
\t-m\tUse MIME types instead of file descriptions\n\
\t-b\tCreate backup directory\n\
\t-s\tUse softlinks (only useful with -b option)\n\
\t-q\tUse \"quiet\" mode (do not write any error messages, skip on errors)"
def file_info(filename, flags):
file_binary = "/usr/bin/file"
(file_stdout, file_stderr) = subprocess.Popen( \
[file_binary, flags, filename], \
stdout=subprocess.PIPE,\
stderr=subprocess.PIPE).communicate()
return file_stdout.split(",")[0].strip('\n')
def sizeof_fmt(num, readable):
if not readable:
return num
for x in ['bytes','KB','MB','GB','TB']:
if num < 1024.0:
return "%3.1f %s" % (num, x)
num /= 1024.0
def main(argv):
# Initializing basic variables
backup = False
readable = False
use_symlinks = False
quiet_mode = False
info_flags = "-b"
sort_key = 0
info_list = []
size_list = []
count_list = []
# Parsing command-line arguments
try:
opts, args = getopt.getopt(argv, "CShmbsq")
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt == "-h":
readable = True
if (opt == "-S"):
if (sort_key > 0):
usage()
sys.exit(2)
sort_key = 1
if (opt == "-C"):
if (sort_key > 0):
usage()
sys.exit(2)
sort_key = 2
if (opt == "-b"):
backup = True
if (opt == "-m"):
info_flags = "-bi"
if (opt == "-s"):
use_symlinks = True
if (opt == "-q"):
quiet_mode = True
# Walk through the files and gather stats
for dir in args:
backup_dir = os.path.join(dir, "backup")
if backup and os.path.exists(backup_dir):
print "Backup directory \"%s\" exists, exiting" % backup_dir
sys.exit(1)
for root, dirs, files in os.walk(dir):
for name in files:
filename = os.path.join(root, name)
# This might cause some errors for moved files, broken links,
# etc. Output all errors to stderr, do not stop the script
try:
size = os.path.getsize(filename)
except OSError, (errno, strerror):
if not quiet_mode:
sys.stderr.write( "Error reading file size for %s: %s\n" % \
(filename, strerror) )
continue
info = file_info(filename, info_flags)
# Add stats to the comparison table
if info in info_list:
index = info_list.index(info)
size_list[index] += size
count_list[index] += 1
else:
info_list.append(info)
size_list.append(size)
count_list.append(1)
# Create backup hardlinks/symlinks
if backup:
backup_path = os.path.join(backup_dir, info)
link_path = os.path.join(backup_path, name)
if not os.path.exists(backup_path):
os.makedirs(backup_path)
try:
if use_symlinks:
os.symlink(filename, link_path)
else:
os.link(filename, link_path)
except OSError, (errno, strerror):
if not quiet_mode:
sys.stderr.write( "Error creating link %s: %s\n" % \
(link_path, strerror) )
info_field_size = len(max(info_list, key=len))
# Print header
print "%s | %s | %s" % ( string.ljust("Type",info_field_size), \
string.ljust("Size", 12), \
"Count")
print string.ljust("", info_field_size + 3 + 12 + 3 + 12, "=")
# Print the result
for info, size, count in sorted(zip(info_list, size_list, count_list), \
key = operator.itemgetter(sort_key), reverse = (sort_key <> 0)):
print "%s | %s | %d" % (string.ljust(info, info_field_size), \
string.ljust(str(sizeof_fmt(size, readable)), 12), \
count )
if __name__ == '__main__':
if len(sys.argv) < 2:
usage()
sys.exit(2)
main(sys.argv[1:])
Скрипт также будет полезен в связке с другим маленьким скриптом, позволяющим отсортировать найденные файлы в директории lost+found по общим директориям.
#!/bin/bash
#
# This script intends to sort files in the lost+found directory to the
# directories depending on the files' parent inodes. This can save much time
# during hand-recovering
#
# The one and only argument is `lost+found`-like directory. No checks are done.
# You are warned.
find $1 -maxdepth 1 -name '*_*'|while read FILE ;
do
FILE_NAME=`basename $FILE`
DIR_NAME=$1/${FILE_NAME%%_*}
mkdir -p $DIR_NAME
mv $FILE $DIR_NAME
done
Самые свежие версии файла будут находиться на GitHub.com