I'm trying to use the MediaWiki API to get all redirects from a certain Wikipedia title and I think i'm almost there but I get lost in the complex data structure of hashes and arrays.
How can I extract the list of redirect titles?
In the example I query for 'Japan' and should get a list that looks like this:
'JPN',"Land der aufgehenden Sonne","Das Land der aufgehenden
Sonne","Zipango","\x{65e5}\x{672c}","R\x{ec}b\x{11b}n"
(a side question: Why do I not get UTF-8 strings for all cases? )
The code below is adapted from MediaWiki::API documentation.
use MediaWiki::API;
use warnings;
binmode STDOUT, ':utf8';
use JSON;
use Data::Dumper;
my $LANG="de";
my $mw = MediaWiki::API->new( { api_url => 'https://en.wikipedia.org/w/api.php' } );
my $mw_LANG = MediaWiki::API->new( { api_url => "https://$LANG.wikipedia.org/w/api.php" } );
my $wikititle ="Japan";
my $alltitles = $mw_LANG->api( {
action => 'query',
titles => $wikititle,
prop => 'redirects',
format => 'json',
} )
|| die $mw_LANG->{error}->{code} . ': ' . $mw_LANG->{error}->{details};
#just print to know how the structure looks like
print Dumper($alltitles);
my ($pageid,$langlinks) = each ( %{ $alltitles->{query}->{pages} } );
print "pageid $pageid\n";#yes works: this prints the page id
problem: how to get the actual titles in the redirect-array
?
my $relinks = $alltitles->{'query'}->{'pages'}->{$pageid}->{'redirects'}; #no does not work!
foreach my $el ( @{ $relinks->{'title'} } ) {
print " $el $el->{'*'}\n";
}
The query returns a hashref. One of the entries in this structure is query
which points to another hashref which contains pages
. The pages
hashref contains keys which are page ids. Each of these point to another hashref which contains a redirects entry which is a reference to an array containing all the pages to which this page redirects.
Putting all those together:
#!/usr/bin/env perl
use strict;
use warnings;
use open qw(:std :utf8);
use MediaWiki::API;
use JSON::MaybeXS;
use Data::Dumper;
my $LANG= "de";
my $mw = MediaWiki::API->new( { api_url => 'https://en.wikipedia.org/w/api.php' } );
my $mw_LANG = MediaWiki::API->new( { api_url => "https://$LANG.wikipedia.org/w/api.php" } );
my $wikititle ="Japan";
my $alltitles = $mw_LANG->api( {
action => 'query',
titles => $wikititle,
prop => 'redirects',
format => 'json',
}
) or die sprintf '%d: %s', @{ $mw_LANG->{error} }{qw(code details)};
for my $pageid ( keys %{ $alltitles->{query}{pages} } ) {
my $r = $alltitles->{query}{pages}{$pageid};
printf "Redirects for page %d with title '%s'\n", @{$r}{qw(pageid title)};
for my $redirect ( @{ $r->{redirects} }) {
printf "\t%d: '%s'\n", @{$redirect}{qw(pageid title)};
}
}
Postfix dereferencing makes things a bit cleaner:
for my $pageid ( keys $alltitles->{query}{pages}->%* ) {
my $r = $alltitles->{query}{pages}{$pageid};
printf "Redirects for page %d with title '%s'\n", $r->@{qw(pageid title)};
for my $redirect ( $r->{redirects}->@* ) {
printf "\t%d: '%s'\n", $redirect->@{qw(pageid title)};
}
}
This requires perl
5.20 or later.