#!/usr/bin/perl
# Converts PostgreSql copy dump to format suitable for Oracle loader
# (c) 2008 Alejandro Imass, (c) 2008 Corcaribe Tecnología C.A.
# Free to use under the terms of Perl itself

use warnings;
use strict;
use Getopt::Long;
use Time::HiRes;

my $VERSION = 0.1;

my $if = undef;
my $of = undef;
my $qc = '"'; #oracle loader default is "
my $sc = ';';
my $perr_str = 'Wrong params, see doc. Tip: Specify at least -if (pg file) and -of (ora loader file)';

GetOptions(
  'if=s'  => \$if,
  'of=s'  => \$of,
  'qc=s'  => \$qc,
  'sc=s'  => \$sc,
) || die $perr_str;
die $perr_str unless defined $if and defined $of;

open(IF, "<", $if) or die "Can't open file $if for reading: $!";
open(OF, ">", $of) or die "Can't open file $of for writing: $!";;


my $start = Time::HiRes::time;
my $count = 1;

$SIG{INT} = \&done;

# skip pg header stuff
while(<IF>){
  last if $_ =~ m/^COPY.*$/;
}


# now do the real thing

while(<IF>){
  chomp $_;
  my @cols = split /\t/,$_;
  my @qcols = ( );
  # quote strings
  foreach my $c (@cols){
    push @qcols, $c =~ m/^(\d+|\d+\.\d+)$/ ? $c : $qc.$c.$qc;
  }
  print OF join($sc,@qcols)."\n" or die "Could not write to OF: $!";
  last if $_ =~ m/^\\\.$/;
  $count++;
}

&done();

exit;


sub done {
  close IF;
  close OF;

  my $end = Time::HiRes::time;
  my $time = $end - $start;
  my $avg = $time/$count;

  print "Done. Processed $count lines\n";
  printf "Start:%s End:%s Time:%s Avg:%s\n",$start,$end,$time,$avg;
}


__END__


=head1 NAME

pgcopy2oraloader.pl - PostgreSql COPY format to CSV for Oracle Loader

=head1 SYNOPSIS

./pgcopy2oraloader.pl -if Infile.sql -of Outfile.csv

=head1 DESCRIPTION

This program will read a standard PostgreSql sql dump in copy format
and tranlate it into a simple delimited file. It work by opening the
Pg file for reading and opening the Outfile for writing. It will skip
all the Pg blurb until the COPY line. Then it will read each line,
quote it appropiately and write the line to the Outfile. The Pg file
COPY command MUST BE tab-separated values (default). But the Outfile
field separation character, as well as the quoting character is
configurable. The default field separation character is the semi-colon
B<;> and the default quoting character is the double quote B<">.

=head1 PARAMETERS

This program will take these parameters from the command line. Only
the infile and outfile parameters are mandatory:

=head2 -if - Required

Full path to the Pg input file.

=head2 -of - required

Full path to the output file.


=head2 -qc - Optional

Quoting character. Defaults to double quote B<">.


=head2 -qc - Optional

Quoting character. Defaults to semi-colon B<;>.


=head1 AUTHOR

Alejandro Imass <ait@p2ee.org>
Alejandro Imass <aimass@corcaribe.com>

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2008 by Alejandro Imass / Corcaribe Tecnología C.A. for the P2EE Project

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself, either Perl version 5.8.8 or,
at your option, any later version of Perl 5 you may have available.


=pod SCRIPT CATEGORIES

Database/Conversion
Format/Conversion
Conversion/Database

=pod PREREQUISITES

C<Getopt::Long>
C<Time::HiRes>


=cut