#! /usr/bin/env perl
# Swisstopo swissNAMES3D parser for Wikidata
# By Abaddon1337
# v. 0.0.2-24072015
#
# usage: sn3dp.pl -i inputfile.csv -q query
#
# Copyright (c) 2015, Abaddon1337
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
# must display the following acknowledgement:
# This product includes software developed by Abaddon1337.
# 4. Neither the name of Abaddon1337 nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY ABADDON1337 ''AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL ABADDON1337 BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
use strict;
use warnings;
use v5.10;
use Text::CSV;
use Getopt::Long;
use Encode qw(decode encode);
our $VERSION = "0.0.2-24072015";
our $ERROR_MESSAGE = "Usage: $0 -i [FILE] -q [QUERY]\n";
my $iFile;
my $iQuery;
my $origEncoding = 'cp1252';
my $finalEncoding = 'utf8';
my @inputData;
my @formattedData;
my $elementLanguage;
my $csv = Text::CSV->new({binary => 1, sep_char => ';'}) or die "Error: cannot use CSV.\n";
my %objectLanguages = ( 'Rumantsch Grischun inkl. Lokalsprachen' => 'rm',
'Hochdeutsch inkl. Lokalsprachen' => 'de',
'Italienisch inkl. Lokalsprachen' => 'it',
'Franzoesisch inkl. Lokalsprachen' => 'fr');
my %lut = ( 'label' => 6,
'lang' => 7,
'nature' => 1,
'lat' => 11,
'long' => 10);
#read input arguments
GetOptions ("input_file=s" => \$iFile,
"query=s" => \$iQuery,
"help" => \&HELP_MESSAGE,
"version" => \&VERSION_MESSAGE)
or die("Error: wrong arguments.\n$ERROR_MESSAGE");
if(!$iFile) {
die "Error: no input file.\n $ERROR_MESSAGE";
}
###############
# PARSER HERE #
###############
say "Opening file $iFile...";
open my $file, "<:encoding($origEncoding)", $iFile or die "$iFile: $!";
say "Getting data...";
while(my $currentLine = $csv->getline($file)) {
push @inputData, $currentLine;
}
$csv->eof or $csv->error_diag();
close $file;
say "Data read. Starting parsing...";
#get indexes on header line
for my $i (0 .. scalar(@{$inputData[0]}-1)) {
if(encode($finalEncoding, $inputData[0][$i]) =~ m/^OBJEKTART$/) {
$lut{'nature'} = $i;
}
elsif(encode($finalEncoding, $inputData[0][$i]) =~ m/^NAME$/) {
$lut{'label'} = $i;
}
elsif(encode($finalEncoding, $inputData[0][$i]) =~ m/^SPRACHCODE$/) {
$lut{'lang'} = $i;
}
elsif(encode($finalEncoding, $inputData[0][$i]) =~ m/^E$/) {
$lut{'long'} = $i;
}
elsif(encode($finalEncoding, $inputData[0][$i]) =~ m/^N$/) {
$lut{'lat'} = $i;
}
}
#parses lines 2 to n
for my $i (1 .. scalar(@inputData)-1) {
my $queryMatch = 0;
if($iQuery) {
# check if query match
my $j = 0;
while( ($j < scalar(@{$inputData[$i]})) && ($queryMatch == 0) ) {
if(encode($finalEncoding, $inputData[$i][$j]) =~ m/$iQuery/) {
$queryMatch = 1;
}
$j++;
}
} #end of if($iQuery)
if((!$iQuery) || ($iQuery && $queryMatch)) {
#get language
if(defined $objectLanguages{$inputData[$i][$lut{'lang'}]}) {
$elementLanguage = $objectLanguages{$inputData[$i][$lut{'lang'}]}
} else {
$elementLanguage = "??";
}
#convert coord.
my $latitude = CHtoWGSlat($inputData[$i][$lut{'long'}], $inputData[$i][$lut{'lat'}]);
my $longitude = CHtoWGSlong($inputData[$i][$lut{'long'}], $inputData[$i][$lut{'lat'}]);
#format content
my %formattedObject = ( 'Libellé' => $inputData[$i][$lut{'label'}],
'Label:lang' => $elementLanguage,
'Property:P31' => $inputData[$i][$lut{'nature'}],
'Property:P625' => "$latitude, $longitude");
#convert names format
$formattedObject{'Libellé'} = encode($finalEncoding, $formattedObject{'Libellé'});
#add content
push @formattedData, %formattedObject;
print "$formattedObject{'Libellé'} = {\n";
print "\t\t\'Label:lang\'\t=> \'$formattedObject{'Label:lang'}\',\n";
print "\t\t\'Property:P31\'\t=> \'$formattedObject{'Property:P31'}\',\n";
print "\t\t\'Property:P625\'\t=> \'$formattedObject{'Property:P625'}\',\n";
print "};\n";
} #end of if($queryMatch)
}
#That's all folks !
say "Parsing finished. Quit.";
#################
# END OF PARSER #
#################
#messages
sub HELP_MESSAGE {
print "\nusage:\n";
print "$0 -i FILE [-q QUERY]\n\n";
print "-i\t--input_file\tinput file csv format\n";
print "-q\t--query\t\tparser query\n";
print "-h\t--help\t\tdisplay this help and exit\n";
print "-v\t--version\toutput version information and exit\n\n";
print "Exit status:\n";
print "0\tif ok,\n";
print "1+\tif failed.\n";
exit 0;
}
sub VERSION_MESSAGE {
print "Swisstopo swissNAMES3D parser for Wikidata version $VERSION\n";
exit 0;
}
#conversions
#Convert CH y/x to WGS lat
sub CHtoWGSlat {
my($y, $x);
my($y_aux, $x_aux);
my $lat;
($y, $x) = ($_[0], $_[1]);
# Converts military to civil and to unit = 1000km
# Auxiliary values (% Bern)
$y_aux = ($y - 600000)/1000000;
$x_aux = ($x - 200000)/1000000;
# Process lat
$lat = 16.9023892
+ 3.238272 * $x_aux
- 0.270978 * $y_aux**2
- 0.002528 * $x_aux**2
- 0.0447 * $y_aux**2 * $x_aux
- 0.0140 * $x_aux**3;
# Unit 10000" to 1 " and converts seconds to degrees (dec)
$lat = $lat * 100/36;
return $lat;
}
# Convert CH y/x to WGS long
sub CHtoWGSlong {
my($y, $x);
my($y_aux, $x_aux);
my $long;
($y, $x) = ($_[0], $_[1]);
# Converts military to civil and to unit = 1000km
# Auxiliary values (% Bern)
$y_aux = ($y - 600000)/1000000;
$x_aux = ($x - 200000)/1000000;
# Process long
$long = 2.6779094
+ 4.728982 * $y_aux
+ 0.791484 * $y_aux * $x_aux
+ 0.1306 * $y_aux * $x_aux**2
- 0.0436 * $y_aux**3;
# Unit 10000" to 1 " and converts seconds to degrees (dec)
$long = $long * 100/36;
return $long;
}