# ourweb.pl - Generate list of links to local virtual web sites
# Copyright 2018 David Meyer <papa@sdf.org> +JMJ
# Copying and distribution of this file, with or without
# modification, are permitted in any medium without royalty
# provided the copyright notice and this notice are preserved.
# This file is offered as-is, without any warranty.
# (GNU All-Permissive License)
# ourweb.pl is part of the Our Web project.

=head1 ourweb.pl - Generate list of links to local virtual web sites

ourweb.pl scans the local file system, identifies the root directory for
user virtual web sites, and generates an HTML document with a list of
links to all user site. A random fortune cookie text is appended at the 
bottom of each page.

This program is part of the Our Web project.

	ourweb.pl  - This program
	ourgoph.pl - Generate list of links to user virtual gopher sites
	newweb.pl  - Generate list of recently added or changed web content
	newgoph.pl - Generate list of recently added or changed gopher 

By default this program lists all user virtual web sites on the local 
host. If a user wishes to exclude his site from the list, creating an 
empty file named ".notours" (read as "Not Ours") in his site's root
directory will cause the site to be left off the site link list. 

=head2 Configuration

	WEBHOST  - Protocol and host portion of URL for local web server
	ROOTGLOB - One or more space-separated glob patterns defining local
	           user root directories (NOT root of web sites)
	FORTUNDB - Path to fortune cookie database file
	TMPLTFIL - Path to HTML template file


$WEBHOST = 'http://grex.org/';
$ROOTGLOB = "/?/?/*";
$FORTUNDB = '/usr/share/games/fortune/fortunes';
$TMPTPATH = '/p/a/papa/share/ourweb';
$TMPTFILE = 'ourweb.tt';

=head2 Module dependencies

	DateTime - List generation time stamp formatting
	Fortune  - Access fortune cookie database
	Template - Template Toolkit template management (all generated HTML
	           is defined in template; this program generates data 
	           values only)


use DateTime;
use Fortune;
use Template;

# Combine template creation with process call at end of prog.?
#$TT = Template->new (
#    { INCLUDE_PATH => '.',
#      INTERPOLATE => 1 }) || die "$Template::ERROR\n";

=head2 Function httitle() - Extract title from HTML file

httitle() tries to extract a title from an HTML file with given path.

The file title is considered to be the contents of the <title> tag, or 
if no <title> tag is found then the contents of the first <h1> tag
appearing in the file. If neither <title> nor <h1> tags are found, an
empty string is returned.

If a title string is found, leading and trailing whitespace characters
are removed, and single space characters are substituted for any 
embedded tab or newline characters (single or sequences).


sub httitle
    my ($path) = @_;
    my $fh, $title = '';
    open $fh, '<', $path or die "Can't open file $path: $!";
    # Slurp all file contents into string $htext
    my $htext = do { local ( $/ ) ; <$fh> } ;
    close $fh;
    # Extract title from contents of <title> or <h1> tags
    # *NOTE* 2018/2/19: Following regexps will not work if file has mult.
    #        <title> or <h1> tags.
    if ($htext =~ m|<title>\s*(.*)\s*</title>|i) { $title = $1; }
    elsif ($htext =~ m|<h1>\s*(.*)\s*</h1>|i) { $title = $1; }
    # Replace tab, newline with space
    if ($title) { $title =~ s/[\n\t]+/ /g; }
    return $title;


=head2 MAIN process 

=head3 Global variables

	sections - Structure for storing web site links grouped by initial 
	           character; part of template data interface. Used to 
	           generate both page-top section index and body web site 
	           link list. Stores array of references to hashes with keys:
	             heading - Heading section character
	             items - Link information for heading section; reference
	                     to array of references to hashes with keys:
	                       link - URL of user virtual web site
	                       label - Web site title
	sectno - Integer index of current section
	heading - Character heading of current section
	itemno  - Integer index of site link list item within heading


$sections = [];
$sectno = -1;
$heading = '';
$itemno = -1;

=head3 Loop for each user roo directory

	userroot - Path of current user root directory


for my $userroot (glob $ROOTGLOB)

=head4 Determine path for user virtual web site root directory

Web site root is either ~/public_html or ~/www directory in user's root.
Skip users who either have no web site root directory, or who have a 
file named ".notours" ("Not Ours") in their web site root directory.

	siteroot - Path of current user virtual web site root directory


    if (-d "$userroot/public_html" && ! -e "$userroot/public_html/.notours")
		my $siteroot = "$userroot/public_html";
    elsif (-d "$userroot/www" && ! -e "$userroot/www/.notours")
		my $siteroot = "$userroot/www";
    else { next; }
#    print "$userroot\n"; # debug

=head4 Extract site link list information

	initial  - First letter or user name; heading for alphabetic 
	username - Web site owner's user name
	siteurl  - URL for user web site
	indexf   - Path to user site root index file
	label    - User site link label text
	title    - Title text extracted from site root index file with

    my $initial = uc (substr $userroot, 1, 1);
    my $username = substr $userroot, 5;
    my $siteurl = $WEBHOST . '~' . $username . '/';

    my $indexf = $siteroot . '/index.html';
    my $label, $title = '';
    if (-r $indexf) { $title = httitle ($indexf); }
    if ($title) { $label = $username . ' &mdash; ' . $title; }
    else { $label = $username; }

=head4 Alphabetic section control break


    if ($initial ne $heading)
	++ $sectno; 
#	print "$sectno\n"; # debug
	$itemno = -1;
	$heading = $initial;
	$sections->[$sectno]->{heading} = $heading;

=head4 Record user web site link list item in current section

This ugly notation for accessing nested array and hash references is the
only place where Perl lets me down, but at least the current program
design keeps the ugliness to the one line above and the two below.


    ++ $itemno;
#    print "$itemno\n"; # debug
    $sections->[$sectno]->{items}->[$itemno]->{link} = $siteurl;
    $sections->[$sectno]->{items}->[$itemno]->{label} = $label;

=head3 The Big Finish

Generate date and time stamp and fortune cookie text for page footer, 
and then output list with template.


$timestmp = DateTime->now->strftime ("%A, %B %e, %Y %T %Z");

$fortune = new Fortune ($FORTUNDB)->read_header()->get_random_fortune();

$template = Template->new ({ INCLUDE_PATH => $TMPTPATH }) || die "$Template::ERROR\n";
$template->process (
    { date => $timestmp,
      fortune => $fortune,
      sections => $sections }) || die $template->error ();