Perl

conv_xml2csv.pl

conv_xml2csv
#!/usr/bin/perl
 
#########################################################################
# file:     conv_xml2csv.pl                                             #
# purpose:  convert xml to csv using regexes for slicing and dicing     #
# usage:    conv_xml2csv.pl                                             #
# author:   Copyright 2010 Stephan Rosenke <fraser-scripts@comstau.de>  #
# license:  CC-BY-SA 3.0                                                #
#########################################################################
 
my $input_file = "input.xml";
my $output_file = "output.csv";
 
#########################################################################
# do not mess behind this line                                          #
#########################################################################
 
#minimal version of PERL
require (5.00503);
 
#set special vars
my $scriptname = $0;
$scriptname =~ s/.*\///;
 
#declare some arrays
@input_file = ("");
@output_file = ("");
@result = ("");
 
#open input_file
open (INPUT, "<$input_file") || die "$input_file does not exists";
 
#read in input_file
while (<INPUT>)
 {
 push(@input_file,$_);
 }
close (INPUT);
 
#search and replace in @input_file
for (@input_file) {
 my $line = $_;
 $line =~ s/<(\/)?(article|sect1|tbody|tgroup|title)>//g;
 $line =~ s/<informaltable frame=\"[a-z]+\">//g;
 $line =~ s/<(\/)?informaltable>//g;
 $line =~ s/<tgroup cols=\"[0-9]+\">//g;
 $line =~ s/<para\/>//g;
 $line =~ s/^ +//g;
 $line =~ s/<row>\n/<row>/g;
 $line =~ s/para>\n/para>/g;
 $line =~ s/entry>\n/entry>/g;
 $line =~ s/^\n//;
 $line =~ s/<entry>/|/g;
 $line =~ s/<((\/)?(para|row)|\/entry)>//g;
 $line =~ s/\n/|\n/g;
 push (@result, $line);
}
 
#write result to $output_file
open (OUTPUT, ">$output_file") || die "$output_file cannot be opened";
print OUTPUT @result;
close (OUTPUT);
it/oss/perl.txt · Last modified: 2010-02-16 13:30 by strols
CC Attribution-Share Alike 4.0 International
Driven by DokuWiki Recent changes RSS feed Valid CSS Valid XHTML 1.0