#!/usr/bin/python # -*- coding: utf-8 -*- import os, re, sys def usage(): print '''Usage: %s dir Find strange encoding in file names. All filenames should be in utf8 and in a characters given in the script. ''' % (os.path.basename(sys.argv[0])) def main(): if len(sys.argv)!=2: usage() sys.exit(3) start_dir=sys.argv[1] ok=0 for root, dirs, files in os.walk(start_dir): dirs.sort() for fn in sorted(files): uni_fn=fn.decode('utf8').lower() if re.match(ur'''^[@{}~%=üöäßéèêàâáóña-z0-9_ ():.,!?´'"`\[\]&#-]+$''', uni_fn): ok+=1 continue print 'strange: %s' % os.path.join(root, fn) print 'OK: %s' % ok if __name__=='__main__': main()