Optimize PNG compression in SVG backend on Node.js

Use the environment's zlib implementation if available to get
reasonably-sized SVG files when an XObject image is converted to PNG.
The generated PNG is not optimal because we do not use a PNG predictor.
Futher, when our SVG backend is run in a browser, the generated PNG
images will still be unnecessarily large (though the use of blob:-URLs
when available should reduce the impact on memory usage). If we want to
optimize PNG images in browsers too, we can either try to use a DEFLATE
library such as pako, or re-use our XObject image painting logic in
src/display/canvas.js. This potential improvement is not implemented by
this commit

Tested with:

- Node.js 8.1.3 (uses zlib)
- Node.js 0.11.12 (uses zlib)
- Node.js 0.10.48 (falls back to inferior existing implementation).
- Chrome 59.0.3071.86
- Firefox 54.0

Tests:

Unit test on Node.js:

```
$ gulp lib
$ JASMINE_CONFIG_PATH=test/unit/clitests.json node ./node_modules/.bin/jasmine --filter=SVG
```

Unit test in browser: Run `gulp server` and open
http://localhost:8888/test/unit/unit_test.html?spec=SVGGraphics

To verify that the patch works as desired,

```
$ node examples/node/pdf2svg.js test/pdfs/xobject-image.pdf
$ du -b svgdump/xobject-image-1.svg
 # ^ Calculates the file size. Confirm that the size is small
 #   (784 instead of 80664 bytes).
```
This commit is contained in:
Rob Wu 2017-07-06 15:08:37 +02:00
parent 3479a19bf0
commit 01f03fe393
7 changed files with 233 additions and 1 deletions

View File

@ -1044,6 +1044,7 @@ gulp.task('lib', ['buildnumber'], function () {
'src/{pdf,pdf.worker}.js',
], { base: 'src/', }),
gulp.src([
'examples/node/domstubs.js',
'web/*.js',
'!web/pdfjs.js',
'!web/viewer.js',

View File

@ -12,10 +12,11 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals __non_webpack_require__ */
import {
createObjectURL, FONT_IDENTITY_MATRIX, IDENTITY_MATRIX, ImageKind, isArray,
isNum, OPS, Util, warn
isNodeJS, isNum, OPS, Util, warn
} from '../shared/util';
var SVGGraphics = function() {
@ -104,6 +105,37 @@ var convertImgDataToPng = (function convertImgDataToPngClosure() {
* http://www.libpng.org/pub/png/spec/1.2/PNG-Compression.html
*/
function deflateSync(literals) {
if (!isNodeJS()) {
// zlib is certainly not available outside of Node.js. We can either use
// the pako library for client-side DEFLATE compression, or use the canvas
// API of the browser to obtain a more optimal PNG file.
return deflateSyncUncompressed(literals);
}
try {
// NOTE: This implementation is far from perfect, but already way better
// than not applying any compression.
//
// A better algorithm will try to choose a good predictor/filter and
// then choose a suitable zlib compression strategy (e.g. 3,Z_RLE).
//
// Node v0.11.12 zlib.deflateSync is introduced (and returns a Buffer).
// Node v3.0.0 Buffer inherits from Uint8Array.
// Node v8.0.0 zlib.deflateSync accepts Uint8Array as input.
var input;
// eslint-disable-next-line no-undef
if (parseInt(process.versions.node) >= 8) {
input = literals;
} else {
// eslint-disable-next-line no-undef
input = new Buffer(literals);
}
var output = __non_webpack_require__('zlib')
.deflateSync(input, { level: 9, });
return output instanceof Uint8Array ? output : new Uint8Array(output);
} catch (e) {
warn('Not compressing PNG because zlib.deflateSync is unavailable: ' + e);
}
return deflateSyncUncompressed(literals);
}

View File

@ -288,3 +288,4 @@
!font_ascent_descent.pdf
!issue8097_reduced.pdf
!transparent.pdf
!xobject-image.pdf

View File

@ -0,0 +1,61 @@
%PDF-1.1
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Type/Pages/Count 1/Kids[3 0 R]/MediaBox [0 0 200 100]>>
endobj
3 0 obj
<<
/Type/Page
/Parent 2 0 R
/Resources <<
/XObject << /SomeImage 4 0 R >>
>>
/Contents 5 0 R
>>
endobj
4 0 obj
<<
/Type/XObject
/Subtype/Image
/Width 200 % The width or height directly affects the image's file size.
/Height 100
/ColorSpace/DeviceRGB
/DecodeParms [] % Forces NativeImageDecoder.isSupported to return false.
/BitsPerComponent 8
/Length 580
/Filter [ /ASCIIHexDecode /DCTDecode ]
>>
% convert -size 1x1 xc:red jpeg:- | xxd -p -c40
stream
ffd8ffe000104a46494600010100000100010000ffdb004300030202020202030202020303030304
060404040404080606050609080a0a090809090a0c0f0c0a0b0e0b09090d110d0e0f101011100a0c
12131210130f101010ffdb00430103030304030408040408100b090b101010101010101010101010
1010101010101010101010101010101010101010101010101010101010101010101010101010ffc0
0011080001000103011100021101031101ffc40014000100000000000000000000000000000008ff
c40014100100000000000000000000000000000000ffc40015010101000000000000000000000000
00000709ffc40014110100000000000000000000000000000000ffda000c03010002110311003f00
3a03154dffd9
endstream
endobj
5 0 obj
<</Length 14>>
stream
500 0 0 400 0 0 cm
/SomeImage Do
endstream
endobj
xref
0 6
0000000000 65535 f
0000000008 00000 n
0000000054 00000 n
0000000128 00000 n
0000000246 00000 n
0000001201 00000 n
trailer
<</Root 1 0 R/Size 6>>
startxref
1281
%%EOF

View File

@ -7,6 +7,7 @@
"cff_parser_spec.js",
"cmap_spec.js",
"crypto_spec.js",
"display_svg_spec.js",
"document_spec.js",
"dom_utils_spec.js",
"evaluator_spec.js",

View File

@ -0,0 +1,135 @@
/* Copyright 2017 Mozilla Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* globals __non_webpack_require__ */
import { isNodeJS, NativeImageDecoding } from '../../src/shared/util';
import { setStubs, unsetStubs } from '../../examples/node/domstubs';
import { buildGetDocumentParams } from './test_utils';
import { getDocument } from '../../src/display/api';
import { SVGGraphics } from '../../src/display/svg';
// withZlib(true, callback); = run test with require('zlib') if possible.
// withZlib(false, callback); = run test without require('zlib').deflateSync.
// The return value of callback is returned as-is.
function withZlib(isZlibRequired, callback) {
if (isZlibRequired) {
// We could try to polyfill zlib in the browser, e.g. using pako.
// For now, only support zlib functionality on Node.js
if (!isNodeJS()) {
throw new Error('zlib test can only be run in Node.js');
}
return callback();
}
if (!isNodeJS()) {
// Assume that require('zlib') is unavailable in non-Node.
return callback();
}
var zlib = __non_webpack_require__('zlib');
var deflateSync = zlib.deflateSync;
zlib.deflateSync = function() {
throw new Error('zlib.deflateSync is explicitly disabled for testing.');
};
try {
return callback();
} finally {
zlib.deflateSync = deflateSync;
}
}
describe('SVGGraphics', function () {
var loadingTask;
var page;
beforeAll(function(done) {
loadingTask = getDocument(buildGetDocumentParams('xobject-image.pdf', {
nativeImageDecoderSupport: NativeImageDecoding.DISPLAY,
}));
loadingTask.promise.then(function(doc) {
doc.getPage(1).then(function(firstPage) {
page = firstPage;
done();
});
});
});
afterAll(function(done) {
loadingTask.destroy().then(done);
});
describe('paintImageXObject', function() {
function getSVGImage() {
var svgGfx;
return page.getOperatorList().then(function(opList) {
var forceDataSchema = true;
svgGfx = new SVGGraphics(page.commonObjs, page.objs, forceDataSchema);
return svgGfx.loadDependencies(opList);
}).then(function() {
var svgImg;
// A mock to steal the svg:image element from paintInlineImageXObject.
var elementContainer = {
appendChild(element) {
svgImg = element;
},
};
// This points to the XObject image in xobject-image.pdf.
var xobjectObjId = { ref: 4, gen: 0, };
if (isNodeJS()) {
setStubs(global);
}
try {
svgGfx.paintImageXObject(xobjectObjId, elementContainer);
} finally {
if (isNodeJS()) {
unsetStubs(global);
}
}
return svgImg;
});
}
it('should produce a reasonably small svg:image', function() {
if (!isNodeJS()) {
pending('zlib.deflateSync is not supported in non-Node environments.');
}
withZlib(true, getSVGImage).then(function(svgImg) {
expect(svgImg.nodeName).toBe('svg:image');
expect(svgImg.getAttribute('width')).toBe('200px');
expect(svgImg.getAttribute('height')).toBe('100px');
var imgUrl = svgImg.getAttribute('xlink:href');
// forceDataSchema = true, so the generated URL should be a data:-URL.
expect(imgUrl).toMatch(/^data:image\/png;base64,/);
// Test whether the generated image has a reasonable file size.
// I obtained a data URL of size 366 with Node 8.1.3 and zlib 1.2.11.
// Without zlib (uncompressed), the size of the data URL was excessive
// (80247).
expect(imgUrl.length).toBeLessThan(367);
});
});
it('should produce a svg:image even if zlib is unavailable', function() {
withZlib(false, getSVGImage).then(function(svgImg) {
expect(svgImg.nodeName).toBe('svg:image');
expect(svgImg.getAttribute('width')).toBe('200px');
expect(svgImg.getAttribute('height')).toBe('100px');
var imgUrl = svgImg.getAttribute('xlink:href');
expect(imgUrl).toMatch(/^data:image\/png;base64,/);
// The size of our naively generated PNG file is excessive :(
expect(imgUrl.length).toBe(80247);
});
});
});
});

View File

@ -50,6 +50,7 @@ function initializePDFJS(callback) {
'pdfjs-test/unit/cmap_spec',
'pdfjs-test/unit/crypto_spec',
'pdfjs-test/unit/custom_spec',
'pdfjs-test/unit/display_svg_spec',
'pdfjs-test/unit/document_spec',
'pdfjs-test/unit/dom_utils_spec',
'pdfjs-test/unit/evaluator_spec',