#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Try to find all files in a debian system, which are not # used. This script is not finished. # This Script is in the public domain # Python Imports import os import re import sys import time import cPickle as pickle # used with re.match(...) ignore_regex=[ '/root/.*', '/proc/.*', '/dev/.*', '.*?/lost+found/.*', '/boot/boot\..*', '/tmp/.*', '/var/tmp/.*', '/home/.*'] def usage(): print """Usage: %s Find unused files on a Debian-System. Algorithm: - Get all installed Packages (dpkg --get-selections) - Get all installed files of each package (dpkg -L thispackage) - Search all files in the filesystem (find /) - check for files which are not part of any package """ % ( os.path.basename(sys.argv[0])) def main(): if len(sys.argv)!=1: usage() sys.exit() cmd="dpkg --get-selections" stdin, stdout = os.popen4(cmd) lines=stdout.readlines() if len(lines)<20: print "Error: %s" % ''.join(lines) sys.exit(1) regex=re.compile(r'^(.*?)\s+(.*?)\s*$') packages={} picklefile="packages.pickle" if os.path.isfile(picklefile): print "Use old cache (%s)?" % ( picklefile) input=sys.stdin.readline() if input.strip().lower() in ["y", "j"]: fd=open(picklefile) packages=pickle.load(fd) fd.close() #for p, pfiles in packages.items(): # print p, pfiles #sys.exit() if not packages: fd=open(picklefile, "w") for line in lines: match=regex.match(line) if not match: raise("Error in line: %s" % line) pak=match.group(1) stat=match.group(2) if not stat in ["hold", "install"]: assert(stat=="deinstall") continue print "Package %s" % pak cmd="dpkg -L '%s'" % pak stdin, stdout = os.popen4(cmd) files=[] for f in stdout.readlines(): files.append(f.strip().rstrip("/")) stdout.close() packages[pak]=files pickle.dump(packages, fd) fd.close() print "Wrote", picklefile files=[] filesfile="files.pickle" if os.path.isfile(filesfile): print "Use old file cache (%s)?" % ( filesfile) input=sys.stdin.readline() if input.strip().lower() in ["y", "j"]: fd=open(filesfile) files=pickle.load(fd) fd.close() if not files: cmd="find /" print "Running: %s" % cmd stdin, stdout = os.popen4(cmd) files=[] for f in stdout: if not f: continue files.append(f.strip().rstrip("/")) fd=open(filesfile, "w") pickle.dump(files, fd) fd.close() print "Wrote", filesfile filedict={} for pak, pfiles in packages.items(): for f in pfiles: assert(f==f.strip().rstrip("/")) if not f: continue l=filedict.get(f, []) l.append(pak) filedict[f]=l #print "filedict[%s]=%s" % (f, l) #print "len(filesdict)==%s" % len(filedict) unknownfile="unkown.txt" knownfile="known.txt" fd=open(unknownfile, "w") fdk=open(knownfile, "w") fd.write("#Unknown files. Created by %s at %s\n" % ( os.path.basename(sys.argv[0]), time.strftime("%Y-%m-%d %H:%M"))) fdk.write("#Known files. Created by %s at %s\n" % ( os.path.basename(sys.argv[0]), time.strftime("%Y-%m-%d %H:%M"))) for f in files: assert(f==f.strip().rstrip("/")) if not f: continue # In ignore_regex? ignore=None for i in ignore_regex: if re.match(i, f): ignore=1 break if ignore: continue # Broken Link? f_orig=f try: f=os.path.realpath(f) except: print "Error in Link:", f continue if not os.path.exists(f): if f!=f_orig: print "Broken Link: %s --> %s" % ( f_orig, f) continue if not filedict.has_key(f): fd.write("%s\n" % f) else: fdk.write("%s: %s\n" % (f, ', '.join(filedict[f]))) fd.close() fdk.close() print "Created %s" % unknownfile print "Created %s" % knownfile if __name__=="__main__": main()