#!/usr/local/bin/ruby

require 'dbi'
require 'find'

## configuration

base_dir = File.dirname(($0 =~ /^\//) \
	 ? $0 \
	 : Dir.pwd + '/' + $0) + '/../..'

equiv = [
    %w( cat kitty kitties cats ),
    %w( dawn dawns ),
    %w( dave daves ),
    %w( apt apartment apts apartments ),
    %w( balcony deck ),
    %w( michaud michauds ),
    %w( bushong bushongs ),
    %w( christmas xmas ),
    %w( grandpa grandfather grandfathers grandpas ),
    %w( grandma grandmother grandmothers grandmas ),
    %w( mom mother ),
    %w( dad father ),
    %w( bbq grill ),
    %w( bike bikes ),
    %w( bushong bushongs ),
    %w( computer computers ),
    %w( hwy highway ),
    %w( bday birthday ),
    %w( horse horses ),
]

syn = Hash.new([]).replace({
    'cammy'	=> %w( cat ),
    'kirby'	=> %w( cat ),
    'pablo'	=> %w( dog ),
    'bed'	=> %w( bedroom ),
    'dave'	=> %w( david bushong dbushong ),
    'mconst'	=> %w( michael constant ),
    'jon'	=> %w( kuroda ),
    'geordan'	=> %w( rosario ),
    'nevman'	=> %w( nevin cheung ),
    'debbie'	=> %w( hoo ),
    'scotsman'	=> %w( ben scott ),
    'mikeh'	=> %w( mike howard ),
    'peterm'	=> %w( peter mardahl ),
    'lila'	=> %w( patton ),
    'tmonroe'	=> %w( tony anthony monroe ),
})

## functions

def debug(*args)
    $stderr.puts(*args) if $verbose
end

def parseInfo(path)
    text = File.open(path) { |f| f.read }
    hash = {}
    for match in text.scan(/^\*\*\s+([^\n]+)\n(.*?)(?=^\*\*\s|\Z)/m)
	hash[match[0]] = match[1].split
    end

    hash
end

def checkAndCreate(col, what)
    return $id[col][what] if $id[col][what]

    where = col + 's'

    sth = $dbh.prepare("select #{col}_id from #{where} where #{col} = ?")
    sth.execute(what)
    if row = sth.fetch
	$id[col][what] = row[0]
    else
	debug("adding #{col}: #{what}")
	sth = $dbh.prepare("insert into #{where} (#{col}) values (?)")
	sth.execute(what)
	$id[col][what] = 
	    $dbh.select_one("select last_value from #{where}_#{col}_id_seq")[0]
    end
end

## startup

$verbose = ARGV[0] == '-v'
$dbh = DBI.connect('DBI:Pg:__DBNAME__', '__USERNAME__')
$dbh.do('delete from entries')

$id = {
    'keyword'	=> {},
    'path'	=> {},
}

## main loop

Find.find(base_dir) { |path|
    next if path.include?('/.') || File.ftype(path) != 'directory' ||
	File.exists?(path + '/.htaccess')

    base_path = path.sub(/^#{base_dir}\/?/, '')
    next if base_path =~ %r<^photos/album/>

    words_all = base_path.split(/[-\/_]/)

    if File.exists?(kwpath = (path + '/.keywords'))
	word_hash = parseInfo(kwpath)
	words_all += word_hash['.'] if word_hash['.']
    end

    for file in Dir.entries(path).grep(/(jpe?g|png|gif)$/i)
	words = words_all
	words += word_hash[file] if word_hash && word_hash[file]
	words += file.sub(/\.[^.]+$/, '').split(/\s+|[-_]/)
	words.map! { |word| word.gsub(/\d+/, '').downcase }
	words.dup.each {|word| words += syn[word]}
	for word in words.dup
	    for eq in equiv
		if eq.include?(word)
		    ewds = eq.dup
		    ewds.delete(word)
		    words += ewds
		end
	    end
	end

	short_path = "#{base_path}/#{file}".sub(/^\//, '')
	path_id = checkAndCreate('path', short_path)

	used = []
	for word in words.uniq
	    next if word =~ /^((img|gif|jpe?g|png|pic|th(umb)?)s?|.{0,2})$/i

	    used << word
	    keyword_id = checkAndCreate('keyword', word)
	    sth = $dbh.prepare(
		    'insert into entries (path_id, keyword_id) values(?, ?)')
	    sth.execute(path_id, keyword_id)
	end

	#debug("#{short_path} -> #{used.inspect}")
    end
}
