#!/usr/bin/perl

#By Gabor Szabo
#Patch by Orna Agmon
use strict;
use warnings;

my $VERSION = '0.01';

use HTML::Parser;
use Data::Dumper qw(Dumper);
use XML::RSS;
use POSIX qw(mktime strftime);

my $dir     = $ARGV[0];
my $outfile = $ARGV[1] or die "Usage: $0 future-directory-name  rss2-file\n";


my $td;     # counting the td elements
my %data;   # hold all the data about one date
my @events; # the data for all the future events
my @tree;   # stack of current items

my $rss2 = new XML::RSS (version => '2.0');
$rss2->channel(
		title          => 'Haifux',
		'link'         => 'http://www.haifux.org/',
		language       => 'en-us',
		description    => 'Haifux Linux Club',
		rating         => '(PICS-1.1 "http://www.classify.org/safesurf/" 1 r (SS~~000 1))',
		copyright      => 'Copyright 2004, Haifux.org',
		pubDate        => scalar localtime(),
		lastBuildDate  => scalar localtime(),
		docs           => 'http://blogs.law.harvard.edu/tech/rss',
		managingEditor => 'webmaster at haifux.org',
		webMaster      => 'webmaster at haifux.org',
		ttl            => "360",
		generator      => "Perl and XML::RSS",
	);
my $url = "http://www.haifux.org";


foreach my $input_file (<$dir/*>) {

	$td = 0;
	%data = (comment => '');
	
	my $p = HTML::Parser->new( api_version => 3,
			start_h => [\&start, "tagname, attr"],
			end_h   => [\&end,   "tagname"],
			text_h  => [\&text,  "dtext"],
			marked_sections => 1,
	);
	
	$p->parse_file($input_file);

	my ($day, $month, $year) = split /\//, $data{date};
	# default values in case the date was missing
	$day    ||= 1;
	$month  ||= 1;
	$year   ||= 2009;
#expecting a date in four digits for the year

	my $time_then = mktime(0, 30, 18, $day, $month-1, $year-1900); # assuming meetings start at 18:30
	 #POSIX: 
	#strftime("%B %d, %Y" , localtime $time_then),

	print "$data{lecture_text}\n";
	$rss2->add_item(
		title       => $data{lecture_text},
#		permaLink   => "http://www.haifux.org/",
#		enclosure   => { url=> "$url/$data{lecture_link}"},
 		'link'      => "$url/$data{lecture_link}",
 		permaLink   => "$url/$data{lecture_link}",
		description => $data{comment},
#		'link'      => "http://www.haifux.org/",
		author      => "&lt;a href=&quot;" . $data{person_link} . "&quot;&gt;" . $data{person_text} . "&lt;/a&gt;",
		pubDate     => scalar localtime($time_then),
		category    => "Meetings",
	);

	my %t = %data; # to make sure we store a copy of the structure
	push @events, \%t;
}

#print Dumper \@events;
$rss2->save($outfile);
exit; # so we know here is the end of it :)


sub start {
	my ($tagname, $attr) = @_;
	$td++ if $tagname eq "td";#count the table cell we are in
	push @tree, $tagname;

	if ($td == 2 and $tagname eq "a") { 
	  $data{lecture_link} = $attr->{href};
	}
	if ($td == 3 and $tagname eq "a") { 
	  $data{person_link}  = $attr->{href};
	}
	if ($td == 5 and $tagname eq "a") {
	  $data{comment}     .= qq(&lt;a href=&quot;$attr->{href}&quot;&gt;);
	  print "1comment $data{'comment'}";
	}
}

sub end {
	my ($tagname) = @_;
	pop @tree || die "Not symmetric\n";
	if ($td == 5 and $tagname eq "a") {
	  $data{comment}     .= qq(&lt;/a&gt;);
	  print "2comment $data{'comment'}";
	}
}

sub text {
	my ($text) = @_;
	return if not @tree;
	#work around - html parser interprets nbsp as char 160,
	# it should be ' '
	my $nbsp=chr(160);
	$text=~s/$nbsp/ /;

	if ($td == 1 and $tree[-1] eq "td") { $data{id}           = $text; }
	if ($td == 2 and $tree[-1] eq "a")  { $data{lecture_text} = $text; }
	if ($td == 3 and $tree[-1] eq "a")  { $data{person_text}  = $text; }
	if ($td == 4 and $tree[-1] eq "td" or $tree[-1] eq "div") {
	  $data{date}         = $text;
	}
	if ($td == 5 and $tree[-1])         { $data{comment}     .= $text; }
}




