#!/usr/bin/perl # Author: Vlado Keselj 2009-19 last update: 2019-12-07 # Change file names to safe names (e.g., space to _) # This is a version with detailed explanations and code comments. # Usage: fix-file-names.pl f1 f2 ... for my $fnold (@ARGV) { #-> Loop through all arguments given # in the command line my $fnnew = &fix_filename($fnold); #-> $fnold is the old file name, and # $fnnew is the new file name # Let us check if there is any name change; if not, we do nothing but # report that we are keeping the same file name. if ($fnnew eq $fnold) { print "$fnnew \t\tthe same file name kept!\n" } # Otherwise, we need to rename the file: else { if (-e $fnnew) { die "$fnnew already exists!" } #<- If there is another # file with name $fnnew # we do not want to # overwrite it, so # let us abort the # mission! print "$fnold \t-> $fnnew\n"; #<- Let the user know what we are about # to do. rename($fnold,$fnnew) or die; #<- Rename the file! (or die on error) } } # This is the function that makes a new, better name, if needed, out of # the old name. It take one argument, which is the old name, and returns # the new name. sub fix_filename { local $_ = shift; # Take the argument and store it in the default # variable $_. I am not sure if "local" is required # in the new Perl, but just to be safe... s/ +- +/-/g; # People like to name files like "file -- 1.txt" # or "cv - 2.pdf"; simply replace those with # "file-1.txt" or "cv-2.pdf" s/''+/--/g; # I see sometimes a sequence of single quotes in # file names. Replace them with -- s/'/-/g; # However, if a single single quote is used, then # replace it with - s/[[(<{]/_-/g; # People like to use "file(2).doc" or something like s/[])>}]/-_/g; # that. Do not allow parentheses of any kind; e.g., # () [] {} <> and replace that with "file_-2-_.doc" s/[,:;]\s*/--/g; # If someone uses punctuation, such as , : ; optionally # followed by some spaces, replace all that with -- s/&/and/g; # If & is used in file name, replace it with the word # "and" s/ /_/g; # At this point, replace any space with underscore: _ s/__+/_/g; # If there is a sequence of more than one _ make it # only one (they are hard to be distinguished anyways) s/---+/--/g; # Similarly, reduce more than two minuses - into two -- s/\xE2\x80\x99/-/g; # Single right quote. This is UTF-8 code for a # Unicode character equivalent to single quote. # Replace it with minus. I know there are many more # Unicode characters that should be better treated, # but maybe in time, for future work. s/(=|[^\w.-])/"=".uc unpack("H2",$1)/ge; # Now, take any other character # that I do not want to see in a file name and # convert it. These are any characters other than # alphanumeric characters (letters, digits), underscore # period, minus sign, and = sign. They are all changed # to their ASCII hexadecimal code in the form =HH # The character = is treated in the same way to # distinguish it from newly introduced = signs. return $_; # Finaly return the new name. }