René Nyffenegger's collection of things on the web
René Nyffenegger on Oracle - Most wanted - Feedback -
 

Removing HTML Tags with Perl

#!/usr/bin/perl -w

use strict;

use HTML::TokeParser::Simple;

RemoveTags("some_file.html");

sub RemoveTags {
  my $html_doc=shift;
  my $p = HTML::TokeParser::Simple->new ($html_doc);
  
  while ( my $token = $p->get_token ) {
    print $token->as_is if $token->is_text;
  }
}
some_file.html
<html>
<head><title>Some title</title></head>
<body>
  <h1>Hello World</h1>

  How are you

  <p>New paragraph and stuff
</body>
</html>