diff --git a/contrib/kpa-filter b/contrib/kpa-filter index 1636cd53..43e6fa31 100755 --- a/contrib/kpa-filter +++ b/contrib/kpa-filter @@ -1,462 +1,568 @@ #!/usr/bin/perl # Copyright 2019 Robert Krawitz # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public # License as published by the Free Software Foundation; either # version 2 of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; see the file COPYING. If not, write to # the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, # Boston, MA 02110-1301, USA. # Maintain KPhotoAlbum index.xml files use strict; use warnings; use XML::LibXML; +use DBI; +use DBD::SQLite::Constants qw/:file_open/; use Getopt::Long; use Carp("cluck"); my (%categories); # Map between category name and ID for compressed files my (%category_map); my (@category_names); # This is stored as {category}{member}{groupname}, as members can only be # members of one group. Weird, huh? But this way, when we overlay the new my (%member_groups); # $group_closure{$category}{value}{values...} my (%group_closure); # Global image (for filtering) my (%current_image); -my ($opt_config_file); +my (%exif_data); +my ($opt_config_file); my ($opt_list_type); +my ($opt_use_exif) = 1; +my ($opt_print_count); +my ($opt_dryrun); # Needs to be kept up to date with XMLDB/Database.cpp my (@standard_vars) = ( "file", "label", "description", "startDate", "endDate", "yearFrom", "monthFrom", "dayFrom", "hourFrom", "minuteFrom", "secondFrom", "yearTo", "monthTo", "dayTo", "angle", "md5sum", "width", "height", "rating", "stackId", "stackOrder", - "gpsPrec", - "gpsLon", - "gpsLat", - "gpsAlt", "videoLength"); +my ($rootdir); + +my (@exif_vars) = ( + "Exif_Photo_FocalLength", + "Exif_Photo_ExposureTime", + "Exif_Photo_ApertureValue", + "Exif_Photo_FNumber", + "Exif_Photo_Flash", + "Exif_Photo_Contrast", + "Exif_Photo_Sharpness", + "Exif_Photo_Saturation", + "Exif_Image_Orientation", + "Exif_Photo_MeteringMode", + "Exif_Photo_ISOSpeedRatings", + "Exif_Photo_ExposureProgram", + "Exif_Image_Make", + "Exif_Image_Model", + "Exif_GPSInfo_GPSVersionID", + "Exif_GPSInfo_GPSAltitude", + "Exif_GPSInfo_GPSAltitudeRef", + "Exif_GPSInfo_GPSMeasureMode", + "Exif_GPSInfo_GPSDOP", + "Exif_GPSInfo_GPSImgDirection", + "Exif_GPSInfo_GPSLatitude", + "Exif_GPSInfo_GPSLatitudeRef", + "Exif_GPSInfo_GPSLongitude", + "Exif_GPSInfo_GPSLongitudeRef", + "Exif_GPSInfo_GPSTimeStamp", + "Exif_Photo_LensModel" + ); + +my ($querystr) = 'SELECT ' . join(', ', "filename", @exif_vars) . " FROM exif"; +my ($exif_query); + my (@all_vars) = ( @standard_vars, "mediaType"); sub usage() { - my ($known_vars) = join("\n ", sort map {my $a = $_; $a =~ s/ //g; " $a"} @all_vars); + my ($known_vars) = join("\n ", sort map {my $a = $_; $a =~ s/ //g; "\$$a"} @all_vars); + my ($exif_vars) = join("\n ", sort map {"\$$_"} @exif_vars); my ($usage) = << "FINIS"; -Usage: $0 [--db filename] [--list-values category] [filter] +Usage: $0 [--db filename] [--list-values category] [-exif|-no-exif] [filter] Filter a KPhotoAlbum database file for either images files or category values. If --list-values is not specified, kpa-filter extracts files from the database that match the specified criteria. Currently only attributes listed in the index.xml file are supported; EXIF data is not utilized. If --list-values is specified, along with the name of a category (e. g. Keywords), all values of the specified category that are assigned to any member of the selected set (including those assigned hierarchically) are reported. + If --no-exif is specified, EXIF data is not searched. This + improves performance if that is not needed. + Filters are Perl expressions. Variable names are generated from the attributes. The supported variables are: $known_vars + The following EXIF attributes may be specified in the same way: + + $exif_vars + In addition, the following functions are provided to match on categoried information: hasKeyword(\$value) hasPerson(\$value) hasPlace(\$value) hasToken(\$value) hasAttribute(\$category, \$value) matchesKeyword(\$pattern) matchesPerson(\$pattern) matchesPlace(\$pattern) matchesToken(\$pattern) matchesAttribute(\$category, \$pattern) The "matches" functions accept Perl regular expressions (see perlre(1)). FINIS print STDERR $usage; exit(1); } ################################################################ ################################################################ # Load files ################################################### ################################################################ ################################################################ ################################################################ # Utilities ################################################################ sub isNode($$) { my ($node, $name) = @_; return ($node->nodeType() == 1 && lc $node->nodeName() eq $name); } sub computeClosure($\@) { my ($category, $members) = @_; my (%answer); foreach my $value (@$members) { map { $answer{$_} = 1;} keys %{$group_closure{$category}{$value}}; } return sort keys %answer; } ################################################################ # Categories ################################################################ sub loadCategories($$) { my ($node, $compressed) = @_; foreach my $child($node->childNodes()) { next if !isNode($child, "category"); my ($category) = $child->getAttribute("name"); $categories{$category} = {}; $category_map{$category} = []; $member_groups{$category} = {}; $group_closure{$category} = {}; my (@members); foreach my $grandchild ($child->childNodes()) { next if !isNode($grandchild, "value"); my ($value) = $grandchild->getAttribute("value"); # This works for both compact and original file format $categories{$category}{$value} = 1; $category_map{$category}[$grandchild->getAttribute("id")] = $value; $group_closure{$category}{$value} = {}; $group_closure{$category}{$value}{$value} = 1; } } @category_names = sort keys %category_map; } ################################################################ # Images ################################################################ # Image options and values for uncompressed files. sub loadUncompressedOptions(\%) { my ($image) = @_; my (%options); my ($node) = $$image{"__node"}; foreach my $child ($node->childNodes()) { next if !isNode($child, "options"); my (@members); foreach my $grandchild ($child->childNodes()) { next if !isNode($grandchild, "option"); my ($category) = $grandchild->getAttribute("name"); foreach my $greatgrandchild ($grandchild->childNodes()) { next if !isNode($greatgrandchild, "value"); my ($val) = $greatgrandchild->getAttribute("value"); push @members, $val; } map { $options{$category}{$_} = 1; } computeClosure($category, @members); } } return \%options; } # Compressed XML files are simpler to parse; there's simply an attribute # for each category sub loadCompressedOptions(\%) { my ($image) = @_; my (%options); foreach my $category (@category_names) { my ($members) = $$image{$category}; my (@members); if (defined $members && $members ne '') { my ($map) = $category_map{$category}; @members = map {$$map[$_]} split(/,/, $members); } $options{$category} = {}; map { $options{$category}{$_} = 1; } computeClosure($category, @members); } return \%options; } sub loadOptions() { if (! defined $current_image{"options"}) { if ($current_image{"__compressed"}) { $current_image{"options"} = loadCompressedOptions(%current_image); } else { $current_image{"options"} = return loadUnCompressedOptions(%current_image); } } } sub hasAttribute($$) { my ($category, $value) = @_; loadOptions(); return defined $current_image{"options"}{$category}{$value}; } sub hasKeyword($) { my ($value) = @_; return hasAttribute("Keywords", $value); } sub hasPerson($) { my ($value) = @_; return hasAttribute("People", $value); } sub hasPlace($) { my ($value) = @_; return hasAttribute("Places", $value); } sub hasToken($) { my ($value) = @_; return hasAttribute("Token", $value); } sub matchesAttribute($$) { my ($category, $value) = @_; loadOptions(); return grep(/$value/, keys %{$current_image{"options"}{$category}}) > 0; } sub matchesKeyword($) { my ($value) = @_; return matchesAttribute("Keywords", $value); } sub matchesPerson($) { my ($value) = @_; return matchesAttribute("People", $value); } sub matchesPlace($) { my ($value) = @_; return matchesAttribute("Places", $value); } sub matchesToken($) { my ($value) = @_; return matchesAttribute("Token", $value); } sub makeVarcode($) { my ($identifier) = @_; my ($varname) = $identifier; $varname =~ s/ //g; - return " my (\$$varname) = \$current_image{\"$identifier\"};"; + return " my (\$$varname) = \$current_image{\"$identifier\"};"; } sub loadImages($$$) { my ($node, $compressed, $filter) = @_; - if (! $filter || $filter eq '') { - $filter = '1'; - } my ($varcode) = join("\n", map {makeVarcode($_)} @standard_vars); + if (! defined $filter) { + $filter = 'true'; + } my ($code) = << 'EOF'; - { - no warnings "uninitialized"; - no warnings "numeric"; - my %items_found; - foreach my $child ($node->childNodes()) { - next if !isNode($child, "image"); - %current_image=(); - $current_image{"__node"} = $child; - $current_image{"__compressed"} = $compressed; - map { $current_image{$_->nodeName} = $_->value } $child->attributes(); +{ + no warnings "uninitialized"; + no warnings "numeric"; + my %items_found; + foreach my $child ($node->childNodes()) { + next if !isNode($child, "image"); + %current_image=(); + $current_image{"__node"} = $child; + $current_image{"__compressed"} = $compressed; + map { $current_image{$_->nodeName} = $_->value } $child->attributes(); EOF - $code .= $varcode; + $code .= "$varcode\n"; $code .= << 'EOF'; - - my ($mediaType) = defined $videoLength ? "Video" : "Image"; - - # Restore any attributes defaulted in version 8 - $current_image{"angle"} = 0 if (! defined $current_image{"angle"}); - $current_image{"endDate"} = $current_image{"startDate"} if (! defined $current_image{"endDate"}); - if (! defined $current_image{"label"}) { - my ($label) = $file; - $label =~ s,^.*/(.*)\.[^.]*$,$1,; - $current_image{"label"} = $label; - } + # Restore any attributes defaulted in version 8 + $current_image{"angle"} = 0 if (! defined $current_image{"angle"}); + $current_image{"endDate"} = $current_image{"startDate"} if (! defined $current_image{"endDate"}); + if (! defined $current_image{"label"}) { + my ($label) = $file; + $label =~ s,^.*/(.*)\.[^.]*$,$1,; + $current_image{"label"} = $label; + } EOF - $code .= " if ($filter) {\n"; $code .= << 'EOF'; - if ($opt_list_type) { - loadOptions(); - map { $items_found{$_} = 1; } keys %{$current_image{"options"}{$opt_list_type}}; - } else { - print "$file\n"; - } - } + my ($mediaType) = defined $videoLength ? "Video" : "Image"; +EOF + if ($opt_use_exif) { + my ($exifdecl) = join("\n ", map {"my (\$$_);"} @exif_vars); + my ($exifcode) = join("\n ", map {"\$$exif_vars[$_] = \$\$row[$_];"} (0..$#exif_vars)); + $code .= << "EOF"; + $exifdecl; + if (my \$row = \$exif_data{"\$rootdir\$file"}) { + $exifcode + } +EOF + } + $code .= " if ($filter) {\n"; + if ($opt_list_type) { + if ($opt_list_type =~ /cat(egory)?:(.*)/) { + $code .= << "EOF" + loadOptions(); + map { \$items_found{\$_}++; } keys \%{\$current_image{"options"}{"$2"}}; +EOF + } else { + $code .= " \$items_found{\$$opt_list_type}++;\n"; } - if ($opt_list_type) { - print join("\n", sort keys %items_found), "\n"; + $code .= << 'EOF'; + } + } +EOF + if ($opt_print_count) { + $code .= ' print join("\n", map {sprintf("%7d %s", $items_found{$_}, $_);} sort keys %items_found), "\n"'."\n"; + } else { + $code .= << 'EOF' + print join("\n", sort keys %items_found), "\n"; +EOF + } + } else { + $code .= << 'EOF'; + print "$file\n"; } } EOF + } + $code .= "}\n"; + if ($opt_dryrun) { + print STDERR $code; + exit; + } eval $code; if ($@) { my $known_vars = join("\n", sort map {my $a = $_; $a =~ s/ //g; " $a"} @all_vars); die "Filter $filter failed:\n\n$@\n"; } } ################################################################ # Member groups ################################################################ sub loadMemberGroups($$) { my ($node, $compressed) = @_; foreach my $child ($node->childNodes()) { next if !isNode($child, "member"); my ($category) = $child->getAttribute("category"); my ($groupname) = $child->getAttribute("group-name"); if ($compressed) { my ($members) = $child->getAttribute("members"); if ($members) { my ($map) = $category_map{$category}; my (@members) = grep { ! $_ == 0 } split(/,/, $members); map { $member_groups{$category}{$$map[$_]} = $groupname; } @members; } } else { my ($member) = $child->getAttribute("member"); $member_groups{$category}{$member} = $groupname; } } foreach my $category (sort keys %member_groups) { foreach my $member (keys %{$member_groups{$category}}) { my ($parent) = $member_groups{$category}{$member}; do { $group_closure{$category}{$member}{$parent} = 1; } while (defined ($parent = $member_groups{$category}{$parent})); } } } ################################################################ # Top level file loader ################################################################ sub load_file($$) { my ($file, $filter) = @_; + my ($images); + + if ($opt_dryrun) { + loadImages($images, 1, $filter); + exit; + } + my $doc = XML::LibXML->load_xml(location => $file); if (! $doc) { - usage(); + die "Can't open $file as a KPhotoAlbum database.\n"; } my $kpa = ${$doc->findnodes('KPhotoAlbum')}[0]; if (! $kpa) { - die "$file is not a KPhotoAlbum database\n"; + die "$file is not a KPhotoAlbum database.\n"; } elsif ($kpa->getAttribute("version") != 7 && $kpa->getAttribute("version") != 8) { - die "kpa-list-images only works with version 7 and 8 files\n"; + die "kpa-list-images only works with version 7 and 8 files.\n"; } - my ($compressed) = int $kpa->getAttribute("compressed"); + $rootdir = $file; + $rootdir =~ s,[^/]*$,,; + if ($opt_use_exif) { + my $exif_db .= "${rootdir}exif-info.db"; - my ($images); + if (! -f $exif_db) { + die "Expected EXIF database at $exif_db, but can't find it.\n" + } + my $EXIF_DB = DBI->connect("dbi:SQLite:$exif_db", undef, undef, { + sqlite_open_flags => SQLITE_OPEN_READONLY, + }); + if (! defined $EXIF_DB) { + die "Can't open EXIF database $exif_db.\n" + } + $exif_query = $EXIF_DB->prepare($querystr); + $exif_query->execute(); + # This is measured to be considerably (about 15% total, + # considerably more for EXIF database alone) than individual + # lookups on a prepared query. + while (my @row = $exif_query->fetchrow_array) { + my ($filename) = shift @row; + $exif_data{$filename} = \@row; + } + } + my ($compressed) = int $kpa->getAttribute("compressed"); foreach my $topcn ($kpa->childNodes()) { if (isNode($topcn, "categories")) { loadCategories($topcn, $compressed); - if ($opt_list_type && (!defined $filter || $filter eq '')) { + if ($opt_list_type && !defined $filter)) { print join("\n", sort grep(defined $_, @{$category_map{$opt_list_type}})), "\n"; exit; } } elsif (isNode($topcn, "images")) { $images = $topcn; } elsif (isNode($topcn, "member-groups")) { loadMemberGroups($topcn, $compressed); } elsif (isNode($topcn, "blocklist")) { } elsif ($topcn->nodeType() == 1) { warn "Found unknown node " . $topcn->nodeName() . "\n"; } } # Load images last so that we can stream them through. loadImages($images, $compressed, $filter); } sub get_standard_kpa_config_file() { my ($kpa_config) = $ENV{"HOME"} . "/.config/kphotoalbumrc"; open KPACONFIG, "<", "$kpa_config" or return ""; my ($imageDBFile) = ""; while () { if (/^imageDBFile=(.*)$/) { $imageDBFile = $1; last; } } close KPACONFIG; return $imageDBFile; -} +} sub main($) { my ($filter) = @_; my ($config_file); if ($opt_config_file) { $config_file = $opt_config_file; } else { $config_file = get_standard_kpa_config_file(); } load_file($config_file, $filter); } my ($do_help); my (%options) = ("f" => \$opt_config_file, "db" => \$opt_config_file, "l=s" => \$opt_list_type, "list=s" => \$opt_list_type, "list-values=s" => \$opt_list_type, + "exif!" => \$opt_use_exif, + "c" => \$opt_print_count, + "count!" => \$opt_print_count, + "dryrun!" => \$opt_dryrun, + "dry-run!" => \$opt_dryrun, "h" => \$do_help, "help" => \$do_help, ); Getopt::Long::Configure("bundling", "require_order"); if (!Getopt::Long::GetOptions(%options) || $do_help) { usage(); } main($ARGV[0]);