#!/usr/bin/perl -w
#
# Author:      Norbert Klasen
# E-mail:      klasen@zdv.uni-tuebingen.de
# 
# $Id: id-parse2ldif.pl,v 1.4 2001/04/06 18:07:38 zrdkn01 Exp $
#
# Description: parses 1id-abstracts.txt and generates ldif          
#              
# Redistribution and use in source and binary forms are permitted
# provided that the above copyright notice and this paragraph are
# duplicated in all such forms and that any documentation,
# advertising materials, and other materials related to such
# distribution and use acknowledge that the software was developed
# by Pierangelo Masarati.
#
# THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
# WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
#


use English;
use strict;
my $basedn="cn=IETF,cn=bibtex,dc=directory,dc=dfn,dc=de";
my $baseurl="ftp://ftp.isi.edu/internet-drafts";
my @mon = qw(jan feb mar apr may jun jul aug sep oct nov dec);

my ($line, $list, $i);

for ( $i=0; $i<7; $i++) {
	<>;
}

my ($workinggroup, $wg, $abstract);
my $header = "";
my $state = 0;
while ( <> ) {
    next if /^-/;
    next if /^\s*$/;
    if ( /^\S/ ) {
	if ( $wg ) {
	    outputldif();
	    $state = 0;
	    $header = "";
	    $abstract = "";
	}
	($workinggroup, $wg) = /^(.*) \((.*)\)$/;
    }
    if ( /^  (\S.*\S)/ ) {
	$line = $1;
	if ( ( $line =~ /^\"/ ) && $state ) {
	    outputldif();
	    $state = 0;
	    $header = "";
	    $abstract = "";
	}
	$header .= " ".$line;
	
    }
    if ( /^    (\S.*\S)/ ) {
	#chop $line;
	$abstract .= $1."\n";
	$state = 1;
    }
    
}

exit( 0 );

sub outputldif() {
#    print "HEAD: $header\n";
    my ($title, $authors, $month, $day, $year, $tag) = $header =~ m#\"\s*(.*)\"\,(.*)\, (\d{2})/(\d{2})/(\d{4})\, <([^.]*).*>$#;
    print "dn: cn=$tag,$basedn\n";
    print "cn: $tag\n";
    print "objectclass: top\n";
    print "objectclass: bibtexEntry\n";
    print "objectclass: bibtexUnpublished\n";
    print "bibtexNote: Internet Draft\n";
    print "bibtexTitle: $title\n";
    my $textitle;
    ($textitle = $title) =~ s/(([a-zA-Z0-9&-]*[A-Z]){2}[a-zA-Z0-9&-]*)/{$1}/g;
    #ITU recommendations
    $textitle =~ s/([A-Z](\.\d+)+(\(\d+\))?)/{$1}/g;
    $textitle =~ s/(Java|Kerberos|Microsoft)/{$1}/g;
    if ( $title ne $textitle ) {
	print "bibtexTitle;lang-x-tex: $textitle\n";
    }

    print "bibtexMonth: $mon[$month-1]\n";
    print "bibtexYear: $year\n";
    print "labeledUri: $baseurl/$tag.txt ASCII\n";
    my @authors = split /,/, $authors;
    #trim whitespaces
    for ($i = 0 ; $i <= $#authors ; $i++)
    {
#	$authors[$i] =~ s/^\s+//;
#	$authors[$i] =~ s/\s+$//;		
	my ($givennames, $surname, $aux) = $authors[$i] =~ /^\s*([a-zÄ-üA-Z.' -]+) ([a-zA-Z.'-]+)\W*(\d.*|[Jj]r\.?)?\s*$/ or die "$authors[$i]";
	$authors[$i] = "";
	my $givenname;
	foreach $givenname ( split / /, $givennames ) {
		if ( $givenname =~ /^([a-z]+|De|Van)$/ ) {
			$authors[$i] .= lc($givenname)." ";
		} else {
			my @givennamec = split /-/, $givenname; 
			my $j;
			for ($j = 0 ; $j <= $#givennamec ; $j++) {
				$givennamec[$j] = substr($givennamec[$j], 0, 1) . ".";
			}
			$authors[$i] .= join( "-", @givennamec) . " ";
		}
	}	
	if ( $aux ) {
		$authors[$i] .= "{$surname $aux}";
	} else {
		$authors[$i] .= "$surname";
	}	
    }
    my $author;
    foreach $author (@authors) {
	print "bibtexAuthor: $author\n";
    }
    if (  @authors > 1 ) {
	print "bibtexAuthor;lang-x-tex: ".join(" and ", @authors)."\n";
    }
    print "\n";
	    
       
    
#      print "wg: $wg\n";
#      print "title: $title\n";
#      print "authors: $authors\n";
#      print "month: $mon[$month-1]\n";
#      print "day: $day\n";
#      print "year: $year\n";
#      print "tag: $tag\n";
#      print "abstract: $abstract\n\n";
}





