#!/bin/csh -f # clean_up_album_tree # ------------------------------------------------------------------------- # Shell script to clean up the album tree in the current folder. # Walks the whole subtree doing: # - For each same-named parent/child folder that was presumably created # by moving/copying all parent files to a new same-named child folder # to allow for other child folders of the same parent, and forgetting # to delete some or all of them from the parent: # - Warn of each same-named but different file # - Offer to delete each same-named and identical file from the parent # - Also offer to delete it from the web server # - If any files were offered for deletion, also offer to delete the # various files and subfolders that were created to support the # album/slideshow of this parent folder, both locally and from the # web server: # - album # - index.htm # - pub # - slideshow/ # - thumbs/ # - If index.htm still exists, warn of any subfolders that it might # hide at the web server, preventing the web server from showing # a directory listing of them. # ------------------------------------------------------------------------- # Revision History: # $Log$ # ------------------------------------------------------------------------- # Collect command line options set directory = "." set option_verbose = "false" while ($#argv > 0) if ("$1" == "-h" || "$1" == "--help") then echo "Usage: $0:t [options]" echo "Options:" echo " -h = Show this help text" echo " --help = Show this help text" echo " -v = Show progress messages" echo " --verbose = Show progress messages" echo " -d dir = Operate in specified directory" echo " --directory dir = Operate in specified directory" exit 1 else if ("$1" == "-v" || "$1" == "--verbose") then shift set option_verbose = "true" else if ("$1" == "-d" || "$1" == "--directory") then shift set directory = $1:q shift if (! -x $directory:q) then beep "Error: Not a directory: $directory:q" $0 --help exit 1 endif else if ("-" == "`echo $1:q | cut -c 1`") then beep "Error: Invalid option: $1:q" $0 --help exit 1 else # No leading hyphen, so not an option. It's an arg. # Exit loop since we're done with options break endif end # No arguments defined, so allow none if ($#argv > 0) then beep "Error: No args supported by $0:t" $0 --help exit 1 endif if ($directory:q != ".") then # Note: Can't do this because the cd happens inside the new process # spawned by () # (set echo; cd $directory:q) if ("$option_verbose" == "true") then echo cd $directory:q endif cd $directory:q endif # Loop through all subfolders in alphabetic order. # Note: Double quotes outside the backticks cause the list of # filenames, one per line, generated by find, to be quoted # so that embedded whitespace and special chars are tolerated. # The foreach command operates on each line as a filename, # not each word of each line. # Note: The option -type d selects only directories, not symlinks # so there's no chance of getting a value for dir that refers # to the current folder, except for "." and any other hard links # to the current folder like the ".." links in subdirectories. # User-created hard links are never allowed to refer to folders. # And this foreach command will never return the ".." links # of the subfolders. So the "." link in this folder in the # only special case to be careful of. We'll need to avoid # accidentally comparing . to ./. and concluding that all files # are duplicates and eligible for deletion. foreach dir ("`find . -type d -print | sort`") if ("$option_verbose" == "true") then echo "Considering: $dir ..." endif set base_name = "$dir:t" if ("$base_name" == ".") then # Note: Update base_name to the real tail of the current path, not # just ".". Otherwise we erroneously find that all files in # the top level folder "foo" match themselves because, for # example we compare "bar" with "./bar". Better to compare # "bar" with "foo/bar" when in folder "foo". # Note: Use $cwd or $PWD or pwd -L here, not pwd or pwd -P, for # reasons explained in the normalize_filename script. # If there are multiple symlinked ways to get to this folder, # don't want to use a base_name that doesn't match what the # user specified on his cd command. set base_name = $cwd:t endif # Note: Can't exclude "." here because we DO want to loop through # the contents of the current top level folder, not just the # contents of subfolders. Otherwise this command: # % cd foo/bar; clean_up_album # would miss out on problems in foo/bar that are handled # correctly by this command: # % cd foo; clean_up_album # because the first wouldn't look for problems in bar itself, # only in its subfolders. if ( "$base_name" == "slideshow" \ || "$base_name" == "thumbs" \ ) then if ("$option_verbose" == "true") then echo "Skipping: $dir ..." endif continue endif echo "Processing: $dir ..." cd "$dir" if (-d "$base_name") then if ("$option_verbose" == "true") then echo "Same dir name: $dir/$base_name ..." endif set files_considered_for_removal = "false" # Loop through all files in alphabetic order, except those starting # with ".". # Note: Could also do those, but would have to skip "." and "..". # Would also probably want to skip any other files starting # with "." since by convention they are "hidden" files in # Unix/Linx. On Mac would specifically want to skip ".DS_Store", # which is a "hidden" file created by macOS to preserve the # user-specified layout of icons in Finder's icon view. # It's easier to just let ls default to skipping all dot files. # Note: Double quotes outside the backticks cause the list of # filenames, one per line, generated by find, to be quoted # so that embedded whitespace and special chars are tolerated. # The foreach command operates on each line as a filename, # not each word of each line. # Note: Have to use `ls` here, not just *. Otherwise the wildcard # inside double quotes is not expanded. foreach file ("`ls`") # Note: Ignore the js symlinks. Want them at each parent # all the way up the tree for use by slideshow software. if (-l "$file" && "$file" == "js") then if ("$option_verbose" == "true") then echo "Skipping symlink: $file ..." endif continue endif # The pub script should probably exist at each level, just in case # there's something added later to the folder that needs to be # pushed to the server, without necessarily running re-creating # the whole album locally. Also if some server files are deleted # or a new server is created. Always useful to have pub in all # folders that map to server folders. Makes it possible to # recreate the entire server via a script that finds and runs # all pub scripts in the entire local directory tree. if (-x "$file" && "$file" == "pub") then if ("$option_verbose" == "true") then echo "Skipping file: $file ..." endif continue endif if ("$option_verbose" == "true") then echo "Checking for duplicate file: $base_name/$file ..." endif if (! -e "$base_name/$file") then continue endif echo "Same file name: $file ..." # Note: Use -d to avoid following symlinks, just see them ls -FlAd "$file" "$base_name/$file" @ parent_size = `stat -f "%z" "$file"` @ child_size = `stat -f "%z" "$base_name/$file"` if ($parent_size:q != $child_size:q) then echo "Same file name but different file size" echo "Parent size = $parent_size" echo "Child size = $child_size" beep pause "You might need to fix this. Hit Enter to continue:" continue endif echo "Same file name and size ..." if (-l "$file") then set parent_target_string = "`readlink $file:q`" set child_target_string = "`readlink $base_name:q/$file:q`" if ($parent_target_string:q == $child_target_string:q) then echo "Same symlink name and target string ..." #?? Any reason to get the full expanded non-symbolic paths #?? of the targets via readlink -f and compare them? #?? If they are folders #?? If they are different folders #?? That's to be expected #?? Many symlinks are relative paths like "." or ".." #?? referring to different folders when residing in #?? different folders. Or contain the names of other #?? relative links like album, slideshow, or parent, #?? in their string values that may refer to different #?? folders. #?? Else if they are the same folder #?? That's to be expected. #?? The path may be absolute, not relative. #?? So, it may be an erroneous copy that should be #?? deleted. Or maybe both folders really needed the #?? same symlink to the same target folder. Perhaps #?? some folder shared by multiple folders. #?? So comparing target folders doesn't tell us much. #?? Else if they are files #?? If they are different files #?? That's to be expected #?? Many symlinks are relative paths like "." or ".." #?? referring to different files when residing in #?? different folders. Or contain the names of other #?? relative links like album, slideshow, or parent, #?? in their string values that may refer to different #?? files. #?? Else if they are the same file #?? That's to be expected. #?? The path may be absolute, not relative. #?? So, it may be an erroneous copy that should be #?? deleted. Or maybe both folders really needed the #?? same symlink to the same target file. Perhaps #?? some file shared by multiple folders. #?? So comparing target files doesn't tell us much. #?? For now, just ignore all symlinks with the same name #?? and target string, assuming them to be correct. #?? We'll deal with specific symlink names that we know #?? about later in this script. echo "Skipping symlink: $file ..." continue endif echo "Same symlink name and size, but different target string." beep pause "You might need to fix this. Hit Enter to continue:" continue endif echo "Same file name and size, but not a symlink ..." echo "----------------" echo "diff -s $file $base_name/$file" echo "----------------" diff -s "$file" "$base_name/$file" set rc = $status echo "----------------" if ($rc) then echo "Same file name and size, but different contents." beep pause "You might need to fix this. Hit Enter to continue:" continue endif set files_considered_for_removal = "true" beep pause rm -iv "$file" tridentcd rm -iv "$file" end if ("$files_considered_for_removal" == "true") then echo "Checking for album/slideshow artifact files that should" echo "perhaps also be deleted, since some identical files were" echo "offered for deletion ..." pause echo "----------------" echo "Current folder contents:" echo "----------------" dir echo "----------------" # Note: Do NOT delete the js symlinks. Want them at each parent # all the way up the tree for use by slideshow software. #if (-l "js") then # rm -iv js # tridentcd rm -iv js #endif # Note: Do NOT delete the parent symlinks. They may have a # purpose outside of the use by slideshow software. #if (-l "parent") then # rm -iv parent # tridentcd rm -iv parent #endif if (-l "album") then rm -iv album tridentcd rm -iv album endif if (-f "index.htm") then rm -iv index.htm tridentcd rm -iv index.htm endif if (-f "pub") then rm -iv pub tridentcd rm -iv pub endif if (-d "slideshow") then rm -ivr slideshow tridentcd rm -ivr slideshow endif if (-d "thumbs") then rm -ivr thumbs tridentcd rm -ivr thumbs endif pause echo "----------------" echo "Remaining folder contents:" echo "----------------" dir echo "----------------" pause endif endif check_for_folders_hidden_by_index_htm if ($status) then beep pause "Hit Enter to continue:" endif set display_name = "$dir" if ("$display_name" == ".") then set display_name = "top level of $base_name" endif if ("$option_verbose" == "true") then pause "Done with $display_name. Hit Enter to continue:" echo "" endif cd - end