#!/usr/bin/perl ### wwwboard-to-mysql.pl ################################################################# # Example code that reads in all messages from a wwwboard # and makes a MySql compatible file using the "INSERT" # command. # # You can then convert all the posts on your board to something # more modern like "Nuke" or a PHP/MySql based board. # # You WILL have to modify the file output print lines for the # "INSERT" MySql command to suit your particular MySql database # format. A example "CREATE TABLE" command is also included so # you can test this using PhpMyAdmin. When done you can use # a program such as PhpMyEdit to edit your messages, or search # for records. All special characters that MySql would use # are "escaped" and the output is directly html compatable. # # It creates a new "threadid" for each top post on the main # wwwboard html page, then each followup is a new "postid" # ordered by how they appear on the main page. Later you can # have MySql sort records by date or other factors. The idea # is to have the "top" starting post show with all the # followups below it. You can always modify the code as you # wish. # # Extracting the date is difficult. The date style used here # is XX/XX/XXXX or a two digit year. If your board shows # something different you may need to modify the code to # either leave the date alone or extract the date correctly. # Use print statements to show what date you are capturing. # # If your wwwboard has been operating for years it's main # page may have some formatting problems here and there. The # original code just skipped over this, so check the output # file carefully and edit your main file where there are # problems. # # Expect to lose a few records in the conversion and check # the output file carefully to make sure all data is # processed correctly. # # WARNING: BACK UP ALL YOUR MESSAGES FIRST!!!! Just in case. # # This program is READ ONLY on your messages, and writes # only the "wwwboard-output.sql" file, but don't under # estimate a computers ability to screw up your data. # # If you have the space, make a copy of all your messsages # and place them into a new directory and change the settings # below accordingly. If your board is busy this is a better # way to do this because we take a lot of time opening # messages and reading them. # # This program is not perfect, so you may have to change a # few lines, but if your board is standard and not modified # very much, it should work as - is. All normal disclaimers # apply, if you use this you do so at your own risk. # # For more examples and free sources, visit www.nocrash.com # # V1.1 11/02/2002 # ################################################################# ################################################################# # NOTE: We look for certian "keywords" in each mesage file to # figure out where the message text starts, so here # they are, if you are having problems with strange text # then look in your message files and find something # that will identify the start/end of the message text. # This identifies the LINE that this text is on, so we # generally know where the real info starts and ends. ################################################################# $start_keyword = "Posted by"; $end_keyword = "
\n"; open( OUTFILE, ">$basedir/$outputfile" ) || die $!; open( MSGS, "$basedir/$mesgfile" ) || die $!; @lines =; close(MSGS); $lastline = ""; foreach $line (@lines) { if ( $lastline =~ / (.*)<\/a> - (.*)<\/b>\s+(.*)<\/i>/ ) { push @ENTRIES, $1; $SUBJECT{$1} = $2; $AUTHOR{$1} = $3; $DATE{$1} = $4; ## we may or may not have a good date string here } ## end if ( $line =~ ... } ## end if ( $lastline =~ // ) { $top = $j; } elsif ( $lines[$j] =~ // ) { $bottom = $j; } } ## end foreach ( $j = 0 ; $j <= @lines... if ( $top && $bottom ) { $diff = ( $bottom - $top ); $diff++; for ( $k = $top ; $k <= $bottom ; $k++ ) { if ( $lines[$k] =~ // ) { push ( @FILESX, $1 ); } } $postid = 0; splice( @lines, $top, $diff ); # take lines out so we don't use them again foreach $num (@FILESX) { $filename = "$basedir/$mesgdir/$num\.$ext"; ## get the message text if ( !open( MSGS2, "$filename" ) ) { print OUTFILE "\n#\n#\n#ERROR - Can't find/open '$filename'\n#\n#\n"; next; } @lines2 = ; close(MSGS2); $msg = ""; $gonextline = "0"; foreach $line2 (@lines2) { ### Use this if you want to extract the subject fom each sub message ### ### if ($line2 =~ / (.*)<\/h1>/) { ### $subjectX = $1; ### } ### if ( $line2 =~ $start_keyword ) { $line2 =~ /by (.*) on/; $name = $1; $line2 =~ /on (.*):/; ## extract the date from this line $date = $1; $date =~ /(.*)\/(.*)\/(.*)/; ## format is XX/XX/XXXX $month = $1; $day = $2; $year = $3; $gonextline = "1"; if ( $year < 10 ) { $year += 2000; } # old date format "02" "01" etc... if ( $year < 110 ) { $year += 1900; } # date "99" or "98" or even secrewed up dates "102" etc... next; } ## end if ( $line2 =~ $start_keyword) if ( $line2 =~ "In Reply to:" ) { next; } if ( $line2 eq "
\n" ) { # take out that stray
on one line next; } if ( $gonextline eq "1" ) { ## we have the text we want collect it till end if ( $line2 =~ $end_keyword ) { ## end of the text we want last; } $msg .= $line2; } ## end if ( $gonextline eq "1") } ## end foreach $line2 (@lines2) ### print to the browser so you know what's going on print "FILENAME=$filename By $name on $month/$day/$year\n"; if ( $month > 0 ) { $monthX = $month - 1; } # adjust for timelocal format, then make a "seconds since the Epoch (Midnight, January 1, 1970)" date $date = scalar timelocal( 1, 0, 0, $day, $monthX, $year ) ; # this will cause errors and quit if dates are out of limits #### Here we do some special processing to remove HTML if needed and make everything SQL ready $msg =~ s//\n\n/g; # prepare to remove any stray html, save the returns in text $msg =~ s/
/\n/g; $msg =~ s//\n\n/g; $msg =~ s/
/\n/g; if ($enable_html == 0) { $msg =~ s/<([^>]|\n)*>//g; # take out any other HTML } $msg = html_sql($msg); # make sure it's all HTML and MySql ready #### Make the name safe, html is still included for the E-Mail line if HTML is allowed $name =~ s/\n//g; # no \n in the name please if ($enable_html == 0) { $name =~ s/<([^>]|\n)*>//g; # take out any other HTML } $name = html_sql($name); # make sure it's all HTML and MySql ready ####################################################################################################### ####################################################################################################### ####################################################################################################### ### ### We print the output to the file ### ### Uncomment these lines to see if everything is OK ### ### print "FILENAME=$filename\nBy $name on $month/$day/$year Epoch $date\n$msg\n"; ### &recover_time($date); ### print "DATE=$date\n"; ### print "$msgnum -------------------------------------\n"; ### print OUTFILE "# $filename\n"; ### EXAMPLE ONLY!! You need to format these lines to your database requirements ### each output line is it's own MySql command so you can locate problem records print OUTFILE "INSERT INTO myposts (threadid, postid, date, name, subject, msg) VALUES \n"; print OUTFILE "($threadid,$postid,$date,\"$name\",\"$subjectX\",\"$msg\");\n"; ####################################################################################################### ####################################################################################################### ####################################################################################################### $postid++; } ## end foreach $num (@FILESX) } ## end if ( $top && $bottom ) $threadid++; } ## end main loop print OUTFILE "\n\n"; # MySql end close(OUTFILE); print "DONE\n"; print "