#!/usr/bin/perl #By Gabor Szabo #Patch by Orna Agmon use strict; use warnings; my $VERSION = '0.01'; use HTML::Parser; use Data::Dumper qw(Dumper); use XML::RSS; use POSIX qw(mktime strftime); my $dir = $ARGV[0]; my $outfile = $ARGV[1] or die "Usage: $0 future-directory-name rss2-file\n"; my $td; # counting the td elements my %data; # hold all the data about one date my @events; # the data for all the future events my @tree; # stack of current items my $rss2 = new XML::RSS (version => '2.0'); $rss2->channel( title => 'Haifux', 'link' => 'http://www.haifux.org/', language => 'en-us', description => 'Haifux Linux Club', rating => '(PICS-1.1 "http://www.classify.org/safesurf/" 1 r (SS~~000 1))', copyright => 'Copyright 2004, Haifux.org', pubDate => scalar localtime(), lastBuildDate => scalar localtime(), docs => 'http://blogs.law.harvard.edu/tech/rss', managingEditor => 'webmaster at haifux.org', webMaster => 'webmaster at haifux.org', ttl => "360", generator => "Perl and XML::RSS", ); my $url = "http://www.haifux.org"; foreach my $input_file (<$dir/*>) { $td = 0; %data = (comment => ''); my $p = HTML::Parser->new( api_version => 3, start_h => [\&start, "tagname, attr"], end_h => [\&end, "tagname"], text_h => [\&text, "dtext"], marked_sections => 1, ); $p->parse_file($input_file); my ($day, $month, $year) = split /\//, $data{date}; # default values in case the date was missing $day ||= 1; $month ||= 1; $year ||= 2009; #expecting a date in four digits for the year my $time_then = mktime(0, 30, 18, $day, $month-1, $year-1900); # assuming meetings start at 18:30 #POSIX: #strftime("%B %d, %Y" , localtime $time_then), print "$data{lecture_text}\n"; $rss2->add_item( title => $data{lecture_text}, # permaLink => "http://www.haifux.org/", # enclosure => { url=> "$url/$data{lecture_link}"}, 'link' => "$url/$data{lecture_link}", permaLink => "$url/$data{lecture_link}", description => $data{comment}, # 'link' => "http://www.haifux.org/", author => "<a href="" . $data{person_link} . "">" . $data{person_text} . "</a>", pubDate => scalar localtime($time_then), category => "Meetings", ); my %t = %data; # to make sure we store a copy of the structure push @events, \%t; } #print Dumper \@events; $rss2->save($outfile); exit; # so we know here is the end of it :) sub start { my ($tagname, $attr) = @_; $td++ if $tagname eq "td";#count the table cell we are in push @tree, $tagname; if ($td == 2 and $tagname eq "a") { $data{lecture_link} = $attr->{href}; } if ($td == 3 and $tagname eq "a") { $data{person_link} = $attr->{href}; } if ($td == 5 and $tagname eq "a") { $data{comment} .= qq(<a href="$attr->{href}">); print "1comment $data{'comment'}"; } } sub end { my ($tagname) = @_; pop @tree || die "Not symmetric\n"; if ($td == 5 and $tagname eq "a") { $data{comment} .= qq(</a>); print "2comment $data{'comment'}"; } } sub text { my ($text) = @_; return if not @tree; #work around - html parser interprets nbsp as char 160, # it should be ' ' my $nbsp=chr(160); $text=~s/$nbsp/ /; if ($td == 1 and $tree[-1] eq "td") { $data{id} = $text; } if ($td == 2 and $tree[-1] eq "a") { $data{lecture_text} = $text; } if ($td == 3 and $tree[-1] eq "a") { $data{person_text} = $text; } if ($td == 4 and $tree[-1] eq "td" or $tree[-1] eq "div") { $data{date} = $text; } if ($td == 5 and $tree[-1]) { $data{comment} .= $text; } }