#!/usr/bin/perl ### wwwboard-to-mysql.pl ################################################################# # Example code that reads in all messages from a wwwboard # and makes a MySql compatible file using the "INSERT" # command. # # You can then convert all the posts on your board to something # more modern like "Nuke" or a PHP/MySql based board. # # You WILL have to modify the file output print lines for the # "INSERT" MySql command to suit your particular MySql database # format. A example "CREATE TABLE" command is also included so # you can test this using PhpMyAdmin. When done you can use # a program such as PhpMyEdit to edit your messages, or search # for records. All special characters that MySql would use # are "escaped" and the output is directly html compatable. # # It creates a new "threadid" for each top post on the main # wwwboard html page, then each followup is a new "postid" # ordered by how they appear on the main page. Later you can # have MySql sort records by date or other factors. The idea # is to have the "top" starting post show with all the # followups below it. You can always modify the code as you # wish. # # Extracting the date is difficult. The date style used here # is XX/XX/XXXX or a two digit year. If your board shows # something different you may need to modify the code to # either leave the date alone or extract the date correctly. # Use print statements to show what date you are capturing. # # If your wwwboard has been operating for years it's main # page may have some formatting problems here and there. The # original code just skipped over this, so check the output # file carefully and edit your main file where there are # problems. # # Expect to lose a few records in the conversion and check # the output file carefully to make sure all data is # processed correctly. # # WARNING: BACK UP ALL YOUR MESSAGES FIRST!!!! Just in case. # # This program is READ ONLY on your messages, and writes # only the "wwwboard-output.sql" file, but don't under # estimate a computers ability to screw up your data. # # If you have the space, make a copy of all your messsages # and place them into a new directory and change the settings # below accordingly. If your board is busy this is a better # way to do this because we take a lot of time opening # messages and reading them. # # This program is not perfect, so you may have to change a # few lines, but if your board is standard and not modified # very much, it should work as - is. All normal disclaimers # apply, if you use this you do so at your own risk. # # For more examples and free sources, visit www.nocrash.com # # V1.1 11/02/2002 # ################################################################# ################################################################# # NOTE: We look for certian "keywords" in each mesage file to # figure out where the message text starts, so here # they are, if you are having problems with strange text # then look in your message files and find something # that will identify the start/end of the message text. # This identifies the LINE that this text is on, so we # generally know where the real info starts and ends. ################################################################# $start_keyword = "Posted by"; $end_keyword = "Make Archive File\n"; print "
\n";

open( OUTFILE, ">$basedir/$outputfile" ) || die $!;

open( MSGS, "$basedir/$mesgfile" ) || die $!;
@lines = ;
close(MSGS);

$lastline = "";

foreach $line (@lines) {
	 if ( $lastline =~ /
  • (.*)<\/a> - (.*)<\/b>\s+(.*)<\/i>/ ) { push @ENTRIES, $1; $SUBJECT{$1} = $2; $AUTHOR{$1} = $3; $DATE{$1} = $4; ## we may or may not have a good date string here } ## end if ( $line =~ ... } ## end if ( $lastline =~ // ) { $top = $j; } elsif ( $lines[$j] =~ // ) { $bottom = $j; } } ## end foreach ( $j = 0 ; $j <= @lines... if ( $top && $bottom ) { $diff = ( $bottom - $top ); $diff++; for ( $k = $top ; $k <= $bottom ; $k++ ) { if ( $lines[$k] =~ // ) { push ( @FILESX, $1 ); } } $postid = 0; splice( @lines, $top, $diff ); # take lines out so we don't use them again foreach $num (@FILESX) { $filename = "$basedir/$mesgdir/$num\.$ext"; ## get the message text if ( !open( MSGS2, "$filename" ) ) { print OUTFILE "\n#\n#\n#ERROR - Can't find/open '$filename'\n#\n#\n"; next; } @lines2 = ; close(MSGS2); $msg = ""; $gonextline = "0"; foreach $line2 (@lines2) { ### Use this if you want to extract the subject fom each sub message ### ### if ($line2 =~ /

    (.*)<\/h1>/) { ### $subjectX = $1; ### } ### if ( $line2 =~ $start_keyword ) { $line2 =~ /by (.*) on/; $name = $1; $line2 =~ /on (.*):/; ## extract the date from this line $date = $1; $date =~ /(.*)\/(.*)\/(.*)/; ## format is XX/XX/XXXX $month = $1; $day = $2; $year = $3; $gonextline = "1"; if ( $year < 10 ) { $year += 2000; } # old date format "02" "01" etc... if ( $year < 110 ) { $year += 1900; } # date "99" or "98" or even secrewed up dates "102" etc... next; } ## end if ( $line2 =~ $start_keyword) if ( $line2 =~ "In Reply to:" ) { next; } if ( $line2 eq "
    \n" ) { # take out that stray
    on one line next; } if ( $gonextline eq "1" ) { ## we have the text we want collect it till end if ( $line2 =~ $end_keyword ) { ## end of the text we want last; } $msg .= $line2; } ## end if ( $gonextline eq "1") } ## end foreach $line2 (@lines2) ### print to the browser so you know what's going on print "FILENAME=$filename By $name on $month/$day/$year\n"; if ( $month > 0 ) { $monthX = $month - 1; } # adjust for timelocal format, then make a "seconds since the Epoch (Midnight, January 1, 1970)" date $date = scalar timelocal( 1, 0, 0, $day, $monthX, $year ) ; # this will cause errors and quit if dates are out of limits #### Here we do some special processing to remove HTML if needed and make everything SQL ready $msg =~ s/

    /\n\n/g; # prepare to remove any stray html, save the returns in text $msg =~ s/
    /\n/g; $msg =~ s/

    /\n\n/g; $msg =~ s/
    /\n/g; if ($enable_html == 0) { $msg =~ s/<([^>]|\n)*>//g; # take out any other HTML } $msg = html_sql($msg); # make sure it's all HTML and MySql ready #### Make the name safe, html is still included for the E-Mail line if HTML is allowed $name =~ s/\n//g; # no \n in the name please if ($enable_html == 0) { $name =~ s/<([^>]|\n)*>//g; # take out any other HTML } $name = html_sql($name); # make sure it's all HTML and MySql ready ####################################################################################################### ####################################################################################################### ####################################################################################################### ### ### We print the output to the file ### ### Uncomment these lines to see if everything is OK ### ### print "FILENAME=$filename\nBy $name on $month/$day/$year Epoch $date\n$msg\n"; ### &recover_time($date); ### print "DATE=$date\n"; ### print "$msgnum -------------------------------------\n"; ### print OUTFILE "# $filename\n"; ### EXAMPLE ONLY!! You need to format these lines to your database requirements ### each output line is it's own MySql command so you can locate problem records print OUTFILE "INSERT INTO myposts (threadid, postid, date, name, subject, msg) VALUES \n"; print OUTFILE "($threadid,$postid,$date,\"$name\",\"$subjectX\",\"$msg\");\n"; ####################################################################################################### ####################################################################################################### ####################################################################################################### $postid++; } ## end foreach $num (@FILESX) } ## end if ( $top && $bottom ) $threadid++; } ## end main loop print OUTFILE "\n\n"; # MySql end close(OUTFILE); print "DONE\n"; print "

  • \n"; exit; ############################################################# # Make HTML and MySql compatable strings, take out any # control characters that don't belong. Sometimes you may # want to remove any \n before you call this routine. # This should be safe since it prevents server side includes # or other html and any sql commands. sub html_sql { local ($htmltemp) = $_[0]; $htmltemp =~ s/\&/\&/g; # and sign $htmltemp =~ s/\\/\\/g; # backslash $htmltemp =~ s/'/\'/g; # single quote (for sql) $htmltemp =~ s/%/\%/g; # percent (for sql) $htmltemp =~ s/_/\_/g; # underscore (for sql) $htmltemp =~ s/\b//g; # backspace $htmltemp =~ s/\r//g; # no returns in here $htmltemp =~ s/\x1a//g; # ctl z $htmltemp =~ s/\t/  /g; # tab, give them a few spaces $htmltemp =~ s/\0//g; # null (for sql) $htmltemp =~ s/\cM//g; # cntl M just for fun if ($enable_html == 0) { $htmltemp =~ s//\>/g; # > $htmltemp =~ s/"/\"/g; # double quote (for sql) } else { $htmltemp =~ s/"/\\"/g; # double quote we have to escape this for sql entry } $htmltemp =~ s/\n\n/

    /g; # do this one first $htmltemp =~ s/\n/
    /g; return $htmltemp; } ############################################################# # un-make HTML and MySql compatable strings from # strings we converted earlier, output is normal text. sub un_html_sql { local ($htmltemp) = $_[0]; $htmltemp =~ s/

    /\n\n/g; # put the end of line stuff back $htmltemp =~ s/
    /\n/g; $htmltemp =~ s/\\/\\/g; # backslash $htmltemp =~ s/\'/'/g; # single quote (for sql) $htmltemp =~ s/\%/%/g; # percent (for sql) $htmltemp =~ s/\_/_/g; # underscore (for sql) $htmltemp =~ s/ / /g; # tab or space put it back $htmltemp =~ s/\&/\&/g; # and sign $htmltemp =~ s/\<//g; # > $htmltemp =~ s/\"/"/g; # double quote (for sql) return $htmltemp; } ############################################################################## # Recover a time. ############################################################################## sub recover_time { ( $sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst ) = localtime( $_[0] ); $year += 1900; $date = sprintf( "%02d:%02d:%02d %02d/%02d/%04d", $hour, $min, $sec, $mon + 1, $mday, $year ); $day = sprintf( "%02d/%02d/%04d", $mon + 1, $mday, $year ); $today = sprintf( "%02d", $mday ); $month = sprintf( "%02d", $mon + 1 ); } ## end sub recover_time ############################################################################## ############################################################################## ############################################################################## # Example table for MySql, set up here so it's easy to copy/paste into # something like PhpMyAdmin $null_nuthing = " CREATE TABLE myposts ( id int(11) DEFAULT '0' NOT NULL auto_increment, threadid int(11) DEFAULT '0' NOT NULL, postid int(11) DEFAULT '0' NOT NULL, date int(11) DEFAULT '0' NOT NULL, name text NOT NULL, subject text NOT NULL, msg text NOT NULL, keywords varchar(255) DEFAULT '' NOT NULL, ip varchar(20) DEFAULT '' NOT NULL, spare varchar(255) DEFAULT '' NOT NULL, PRIMARY KEY (id) ); "; ############################################################################## ############################################################################## ##############################################################################