[meego-commits] 6798: Changes to Trunk/perl-HTML-Parser

Peter Zhu peter.j.zhu at intel.com
Wed Aug 18 08:54:53 UTC 2010


Hi,
I have made the following changes to perl-HTML-Parser in project Trunk. Please review and accept ASAP.

Thank You,
Peter Zhu

[This message was auto-generated]

---

Request #6798:

  submit:   Trunk:Testing/perl-HTML-Parser(r4) -> Trunk/perl-HTML-Parser


Message:
    Move to Trunk

State:   new          2010-08-17T20:45:31 peter
Comment: None



changes files:
--------------
--- perl-HTML-Parser.changes
+++ perl-HTML-Parser.changes
@@ -0,0 +1,3 @@
+* Tue Jul 27 2010 Quanxian Wang <quanxian.wang at intel.com>  3.65
+- update to 3.65
+

old:
----
  HTML-Parser-3.63.tar.gz

new:
----
  HTML-Parser-3.65.tar.gz
  perl-HTML-Parser.yaml

spec files:
-----------
--- perl-HTML-Parser.spec
+++ perl-HTML-Parser.spec
@@ -1,19 +1,24 @@
+# 
+# Do not Edit! Generated by:
+# spectacle version 0.18
+# 
+# >> macros
 %define real_name HTML-Parser
+# << macros
 
-Name:           perl-%{real_name}
-Version:        3.63
-Release:        1
+Name:       perl-HTML-Parser
 Summary:        Perl module for parsing HTML
-
+Version:    3.65
+Release:    1
 Group:          Development/Libraries
 License:        GPL+ or Artistic
-Url:            http://search.cpan.org/dist/HTML-Parser/
-Source:         http://search.cpan.org/CPAN/authors/id/G/GA/GAAS/%{real_name}-%{version}.tar.gz
-BuildRoot:      %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
-
-BuildRequires:  perl(HTML::Tagset) >= 3.03, perl(ExtUtils::MakeMaker), perl(Test::Simple)
+URL:        http://search.cpan.org/dist/HTML-Parser/
+Source0:    http://search.cpan.org/CPAN/authors/id/G/GA/GAAS/HTML-Parser-%{version}.tar.gz
+Source100:  perl-HTML-Parser.yaml
 Requires:       perl(:MODULE_COMPAT_%(eval "`%{__perl} -V:version`"; echo $version))
 Requires:       perl(HTML::Tagset) >= 3.03
+BuildRequires:  perl(HTML::Tagset) >= 3.03, perl(ExtUtils::MakeMaker), perl(Test::Simple)
+
 
 %description
 The HTML-Parser module for perl to parse and extract information from
@@ -21,39 +26,67 @@
 HTML::LinkExtor, HTML::PullParser, and HTML::TokeParser modules.
 
 
+
+
 %prep
-%setup -q -n %{real_name}-%{version}
+%setup -q -n HTML-Parser-%{version}
+
+# >> setup
 chmod -c a-x eg/*
+# << setup
 
 %build
-%{__perl} Makefile.PL INSTALLDIRS=vendor OPTIMIZE="$RPM_OPT_FLAGS"
-make %{?_smp_mflags}
+# >> build pre
+# << build pre
 
+if test -f Makefile.PL; then
+%{__perl} Makefile.PL INSTALLDIRS=vendor
+make %{?jobs:-j%jobs}
+else
+%{__perl} Build.PL  --installdirs vendor
+./Build
+fi
+
+# >> build post
+# << build post
 %install
-rm -rf $RPM_BUILD_ROOT
-make pure_install PERL_INSTALL_ROOT=$RPM_BUILD_ROOT
+rm -rf %{buildroot}
+# >> install pre
+# << install pre
+if test -f Makefile.PL; then
+make pure_install PERL_INSTALL_ROOT=%{buildroot}
+else
+./Build install --installdirs vendor
+fi
+find %{buildroot} -type f -name .packlist -exec rm -f {} ';'
+find %{buildroot} -depth -type d -exec rmdir {} 2>/dev/null ';'
+find %{buildroot} -type f -name '*.bs' -empty -exec rm -f {} ';'
+%{_fixperms} %{buildroot}/*
 
+# >> install post
 file=$RPM_BUILD_ROOT%{_mandir}/man3/HTML::Entities.3pm
 iconv -f iso-8859-1 -t utf-8 <"$file" > "${file}_"
 mv -f "${file}_" "$file"
-
-find $RPM_BUILD_ROOT -type f -name .packlist -exec rm -f {} ';'
-find $RPM_BUILD_ROOT -type f -name '*.bs' -empty -exec rm -f {} ';'
-find $RPM_BUILD_ROOT -depth -type d -exec rmdir {} 2>/dev/null ';'
 chmod -R u+w $RPM_BUILD_ROOT/*
 
+# << install post
 %check
+# >> check
 make test
+# << check
+
+
+
 
-%clean 
-rm -rf $RPM_BUILD_ROOT
 
 
 %files
 %defattr(-,root,root,-)
+# >> files
 %doc Changes README TODO eg/
 %{perl_vendorarch}/HTML/
 %{perl_vendorarch}/auto/HTML/
 %doc %{_mandir}/man3/*.3pm*
+# << files
 
 

other changes:
--------------

++++++ HTML-Parser-3.63.tar.gz -> HTML-Parser-3.65.tar.gz
--- Changes
+++ Changes
@@ -1,4 +1,29 @@
 _______________________________________________________________________________
+2010-04-04  Release 3.65
+
+Gisle Aas (1):
+      Eliminate buggy entities_decode_old
+
+Salvatore Bonaccorso (1):
+      Fixed endianness typo [RT#50811]
+
+Ville Skyttä (1):
+      Documentation fixes.
+
+
+_______________________________________________________________________________
+2009-10-25  Release 3.64
+
+Gisle Aas (5):
+      Convert files to UTF-8
+      Don't allow decode_entities() to generate illegal Unicode chars
+      Copyright 2009
+      Remove rendundant (repeated) test
+      Make parse_file() method use 3-arg open [RT#49434]
+
+
+
+_______________________________________________________________________________
 2009-10-22  Release 3.63
 
 Gisle Aas (2):
--- META.yml
+++ META.yml
@@ -1,6 +1,6 @@
 --- #YAML:1.0
 name:               HTML-Parser
-version:            3.63
+version:            3.65
 abstract:           HTML parser class
 author:
     - Gisle Aas <gisle at activestate.com>
@@ -22,7 +22,7 @@
     directory:
         - t
         - inc
-generated_by:       ExtUtils::MakeMaker version 6.55_02
+generated_by:       ExtUtils::MakeMaker version 6.56
 meta-spec:
     url:      http://module-build.sourceforge.net/META-spec-v1.4.html
     version:  1.4
--- Parser.pm
+++ Parser.pm
@@ -1,6 +1,6 @@
 package HTML::Parser;
 
-# Copyright 1996-2008, Gisle Aas.
+# Copyright 1996-2009, Gisle Aas.
 # Copyright 1999-2000, Michael A. Chase.
 #
 # This library is free software; you can redistribute it and/or
@@ -9,7 +9,7 @@
 use strict;
 use vars qw($VERSION @ISA);
 
-$VERSION = "3.63";
+$VERSION = "3.65";
 
 require HTML::Entities;
 
@@ -92,7 +92,7 @@
     if (!ref($file) && ref(\$file) ne "GLOB") {
         # Assume $file is a filename
         local(*F);
-        open(F, $file) || return undef;
+        open(F, "<", $file) || return undef;
 	binmode(F);  # should we? good for byte counts
         $opened++;
         $file = *F;
@@ -891,7 +891,7 @@
 Example:
 
   <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
-  "http://www.w3.org/TR/html40/strict.dtd">
+      "http://www.w3.org/TR/html4/strict.dtd">
 
 DTDs inside <!DOCTYPE ...> will confuse HTML::Parser.
 
@@ -954,7 +954,7 @@
 
 =head2 Unicode
 
-The C<HTML::Parser> can parse Unicode strings when running under
+C<HTML::Parser> can parse Unicode strings when running under
 perl-5.8 or better.  If Unicode is passed to $p->parse() then chunks
 of Unicode will be reported to the handlers.  The offset and length
 argspecs will also report their position in terms of characters.
@@ -1224,10 +1224,10 @@
 
 L<HTML::TreeBuilder> (part of the I<HTML-Tree> distribution)
 
-http://www.w3.org/TR/html4
+L<http://www.w3.org/TR/html4/>
 
 More information about marked sections and processing instructions may
-be found at C<http://www.sgml.u-net.com/book/sgml-8.htm>.
+be found at L<http://www.is-thought.co.uk/book/sgml-8.htm>.
 
 =head1 COPYRIGHT
 
--- Parser.xs
+++ Parser.xs
@@ -1,5 +1,5 @@
 /* 
- * Copyright 1999-2005, Gisle Aas.
+ * Copyright 1999-2009, Gisle Aas.
  * Copyright 1999-2000, Michael A. Chase.
  *
  * This library is free software; you can redistribute it and/or
--- README
+++ README
@@ -58,8 +58,8 @@
 
 COPYRIGHT
 
-  © 1995-2008 Gisle Aas. All rights reserved.
-  © 1999-2000 Michael A. Chase.  All rights reserved.
+  © 1995-2009 Gisle Aas. All rights reserved.
+  © 1999-2000 Michael A. Chase.  All rights reserved.
 
 This library is free software; you can redistribute it and/or modify
 it under the same terms as Perl itself.
--- hparser.c
+++ hparser.c
@@ -1,5 +1,5 @@
 /* 
- * Copyright 1999-2008, Gisle Aas
+ * Copyright 1999-2009, Gisle Aas
  * Copyright 1999-2000, Michael A. Chase
  *
  * This library is free software; you can redistribute it and/or
@@ -1847,7 +1847,7 @@
 		warn("Parsing of undecoded UTF-8 will give garbage when decoding entities");
 	    }
 	    if (utf8 && len >= 2 && strnEQ(beg, "\xFF\xFE", 2)) {
-		warn("Parsing string decoded with wrong endianess");
+		warn("Parsing string decoded with wrong endianness");
 	    }
 #endif
 	    if (!utf8 && len >= 4 &&
--- hparser.h
+++ hparser.h
@@ -1,5 +1,5 @@
 /* 
- * Copyright 1999-2005, Gisle Aas
+ * Copyright 1999-2009, Gisle Aas
  * Copyright 1999-2000, Michael A. Chase
  *
  * This library is free software; you can redistribute it and/or
--- lib/HTML/Entities.pm
+++ lib/HTML/Entities.pm
@@ -14,7 +14,7 @@
 
 For example, this:
 
- $input = "vis-à-vis Beyoncé's naïve\npapier-mâché résumé";
+ $input = "vis-à-vis Beyoncé's naïve\npapier-mâché résumé";
  print encode_entities($input), "\n"
 
 Prints this out:
@@ -68,7 +68,7 @@
 
    $string = "foo&nbspbar";
    _decode_entities($string, { nb => "@", nbsp => "\xA0" }, 1);
-   print $string;  # will print "foo bar"
+   print $string;  # will print "foo bar"
 
 This routine is exported by default.
 
@@ -146,7 +146,7 @@
 @EXPORT = qw(encode_entities decode_entities _decode_entities);
 @EXPORT_OK = qw(%entity2char %char2entity encode_entities_numeric);
 
-$VERSION = "3.63";
+$VERSION = "3.64";
 sub Version { $VERSION; }
 
 require HTML::Parser;  # for fast XS implemented decode_entities
@@ -434,23 +434,6 @@
 
 my %subst;  # compiled encoding regexps
 
-sub decode_entities_old
-{
-    my $array;
-    if (defined wantarray) {
-	$array = [@_]; # copy
-    } else {
-	$array = \@_;  # modify in-place
-    }
-    my $c;
-    for (@$array) {
-	s/(&\#(\d+);?)/$2 < 256 ? chr($2) : $1/eg;
-	s/(&\#[xX]([0-9a-fA-F]+);?)/$c = hex($2); $c < 256 ? chr($c) : $1/eg;
-	s/(&(\w+);?)/$entity2char{$2} || $1/eg;
-    }
-    wantarray ? @$array : $array->[0];
-}
-
 sub encode_entities
 {
     return undef unless defined $_[0];
--- lib/HTML/HeadParser.pm
+++ lib/HTML/HeadParser.pm
@@ -178,7 +178,10 @@
 # <!ENTITY % head.content "TITLE & BASE?">
 # <!ELEMENT HEAD O O (%head.content;) +(%head.misc;)>
 #
-# Added in HTML 5 as of WD-html5-20090423: noscript, command
+# From HTML 5 as of WD-html5-20090825:
+#
+# One or more elements of metadata content, [...]
+# => base, command, link, meta, noscript, script, style, title
 
 sub start
 {
--- t/entities.t
+++ t/entities.t
@@ -1,6 +1,6 @@
 use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);
 
-use Test::More tests => 17;
+use Test::More tests => 18;
 
 $a = "Våre norske tegn bør &#230res";
 
@@ -73,6 +73,8 @@
 
 is(decode_entities("Attention Home&#959&#969n&#1257rs...1&#1109t T&#1110&#1084e E&#957&#1257&#1075"),
   "Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
+is(decode_entities("{&amp;&#x26;amp;& also &#x42f;œ}"),
+    "{&&& also \x{42F}\x{153}}");
 
 __END__
 # Quoted from rfc1866.txt
--- t/uentities.t
+++ t/uentities.t
@@ -2,7 +2,7 @@
 
 use HTML::Entities;
 
-use Test::More tests => 27;
+use Test::More tests => 26;
 
 SKIP: {
 skip "This perl does not support Unicode or Unicode entities not selected",
@@ -25,15 +25,15 @@
 is(decode_entities("&#xFDD1"), "\x{FFFD}");
 is(decode_entities("&#xFDE0"), "\x{FFFD}");
 is(decode_entities("&#xFDEF"), "\x{FFFD}");
-is(decode_entities("&#xFFFF"), "\x{FFFD}");
+is(decode_entities("&#xFFFF"), "&#xFFFF");
 is(decode_entities("&#x10FFFF"), "\x{FFFD}");
-is(decode_entities("&#x110000"), chr(0xFFFD));
-is(decode_entities("&#XFFFFFFFF"), chr(0xFFFD));
+is(decode_entities("&#x110000"), "&#x110000");
+is(decode_entities("&#XFFFFFFFF"), "&#XFFFFFFFF");
 
-is(decode_entities("&#0"), "\0");
-is(decode_entities("�"), "\0");
-is(decode_entities("&#x0"), "\0");
-is(decode_entities("&#X0;"), "\0");
+is(decode_entities("&#0"), "&#0");
+is(decode_entities("�"), "�");
+is(decode_entities("&#x0"), "&#x0");
+is(decode_entities("&#X0;"), "&#X0;");
 
 is(decode_entities("&#&aring&#229å&#xFFF"), "&#ååå\x{FFF}");
 
@@ -59,8 +59,6 @@
 
 is(decode_entities("��"), chr(0x100085));
 
-is(decode_entities("��"), chr(0x100085));
-
 is(decode_entities("&#56256"), chr(0xFFFD));
 
 is(decode_entities("\260’\260"), "\x{b0}\x{2019}\x{b0}");
--- util.c
+++ util.c
@@ -1,5 +1,5 @@
 /* 
- * Copyright 1999-2006, Gisle Aas.
+ * Copyright 1999-2009, Gisle Aas.
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the same terms as Perl itself.
@@ -96,7 +96,6 @@
 
 	if (s < end && *s == '#') {
 	    UV num = 0;
-	    UV prev = 0;
 	    int ok = 0;
 	    s++;
 	    if (s < end && (*s == 'x' || *s == 'X')) {
@@ -106,12 +105,11 @@
 		    if (!tmp)
 			break;
 		    num = num << 4 | ((tmp - PL_hexdigit) & 15);
-		    if (prev && num <= prev) {
+		    if (num > 0x10FFFF) {
 			/* overflow */
 			ok = 0;
 			break;
 		    }
-		    prev = num;
 		    s++;
 		    ok = 1;
 		}
@@ -119,17 +117,16 @@
 	    else {
 		while (s < end && isDIGIT(*s)) {
 		    num = num * 10 + (*s - '0');
-		    if (prev && num < prev) {
+		    if (num > 0x10FFFF) {
 			/* overflow */
 			ok = 0;
 			break;
 		    }
-		    prev = num;
 		    s++;
 		    ok = 1;
 		}
 	    }
-	    if (ok) {
+	    if (num && ok) {
 #ifdef UNICODE_HTML_PARSER
 		if (!SvUTF8(sv) && num <= 255) {
 		    buf[0] = (char) num;
@@ -137,6 +134,9 @@
 		    repl_len = 1;
 		    repl_utf8 = 0;
 		}
+		else if (num == 0xFFFE || num == 0xFFFF) {
+		    /* illegal */
+		}
 		else {
 		    char *tmp;
 		    if ((num & 0xFFFFFC00) == 0xDC00) {  /* low-surrogate */

++++++ perl-HTML-Parser.yaml (new)
--- perl-HTML-Parser.yaml
+++ perl-HTML-Parser.yaml
+Name: perl-HTML-Parser
+Summary: Perl module for parsing HTML
+Version: 3.65
+Release: 1
+Group: Development/Libraries
+License: GPL+ or Artistic
+URL: http://search.cpan.org/dist/HTML-Parser/
+Sources:
+    - http://search.cpan.org/CPAN/authors/id/G/GA/GAAS/HTML-Parser-%{version}.tar.gz
+Description: |
+    The HTML-Parser module for perl to parse and extract information from
+    HTML documents, including the HTML::Entities, HTML::HeadParser,
+    HTML::LinkExtor, HTML::PullParser, and HTML::TokeParser modules.
+
+Requires:
+    - perl(HTML::Tagset) >= 3.03
+PkgBR:
+    - perl(HTML::Tagset) >= 3.03, perl(ExtUtils::MakeMaker), perl(Test::Simple)
+Configure: none
+Builder: perl
+Check: yes




More information about the MeeGo-commits mailing list