Drupal2wiki

From GMOD
Jump to: navigation, search

Perl script written by Eric Just to copy Drupal nodes to a MediaWiki:

#!/usr/bin/perl -w
 
use strict;
use WWW::Mechanize;
use HTML::TreeBuilder::XPath;
use HTML::WikiConverter;
use Data::Dumper;
 
my $mech = WWW::Mechanize->new();
 
my $wiki_url = "http://wiki.gmod.org/index.php";
 
my @urls = qw (
http://www.gmod.org/face_caucus_apidb_user_studies_and_impact_on_development
http://www.gmod.org/face_caucus_sgn_associating_solanaceae_loci_with_phenotype
http://www.gmod.org/face_caucus_ucsc_user_interface_issues_challenges_in_a_many_organism_database
http://www.gmod.org/january_2007_meeting
http://www.gmod.org/nih_2002_workshop_on_model_organism_databases
http://www.gmod.org/user-interface-caucus
http://www.gmod.org/mod_user_interfaces_outline_of_topics
http://www.gmod.org/mod_user_interfaces_sample_of_several_mod_top_level_functions
);
 
 
foreach my $url_to_convert (@urls) {
 
   $mech->get( $url_to_convert );
 
   warn "could not get page: $url_to_convert" unless $mech->success();
   next unless $mech->success();
 
   my $tree= new HTML::TreeBuilder::XPath();
   $tree->parse( $mech->content() );
 
   #Get title
   my $title = $tree->findvalue( '//h1[@class="title"]')->value();
   if ( !$title ) {
      $title = $tree->findvalue( '//title')->value();
      $title =~ s/ [|].+//g;
   }
   die "could not parse title" if !$title;
 
   # delete navigation links for now
   my $nav_node = $tree->findnodes( '//div[@id="main"]//div[@class="nav"]')->[0];
   $nav_node->delete() if $nav_node;
 
   # delete submitted span
   my $submitted_nodes = $tree->findnodes( '//span[@class="submitted"]');
   map { $_->delete() } @$submitted_nodes;
 
   # delete drupal links section
   my $links_nodes = $tree->findnodes( '//div[@class="links"]');
   map { $_->delete() } @$links_nodes;
 
   my $content_nodes = $tree->findnodes( '//div[@id="main"]//div[@class="content"]');
 
   my $count = 0;
 
   # wiki page text
   my $wiki_text;
   foreach my $node ( @{$content_nodes} ) {
       my $html = $node->as_HTML();
       $html =~ s/ style="{?([^}"]+)}?"//g; # "
 
       # convert to wikimedia format
       my $wc = new HTML::WikiConverter( dialect => 'MediaWiki' );
       my $converted_text = $wc->html2wiki( $html );
 
       # strip out leftover div tags
       $converted_text =~ s/<\/?div[^>]*>//g;
 
       # Now all internal links (not starting with http)
       # change [some link]
       # to:  [[some link]]
       # change [?q=node/71 GBrowse] into [[GBrowse]]
       $converted_text =~ s/\[(?!http)([^\s]*) ([^\]]+)\]/[[$2]]/g;
 
       # append node to new wiki page text
       $wiki_text .= $converted_text."\n";
   }
 
    # now simply find or create the page
    # and paste wiki text into edit box,  
    # submit the form, and there's your new page!
    my $url = $wiki_url."?title=".$title."&action=edit";
    eval {
       warn $url;
       $mech->get($url);
       die unless ($mech->success);
 
       $mech->submit_form(
            form_number => 1,
            fields      => { wpTextbox1 => $wiki_text},
        );
       die unless ($mech->success);
    };
    if($@) {
        print "An error occured : ".$@."\n";
    }
    else {
       print "converted $title\n";
    }
}