Make getTextContent return offset array and improve the algorithm. Make

parts in viewer.js work again.
2012-09-11 15:10:34 -07:00 · 2012-09-11 15:10:34 -07:00 · a38c4bc729
commit a38c4bc729
parent e13846821c
3 changed files with 64 additions and 7 deletions
--- a/src/evaluator.js
+++ b/src/evaluator.js
@ -505,7 +505,13 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
      return queue;
    },
-    getTextContent: function partialEvaluatorGetIRQueue(stream, resources) {
+    getTextContent: function partialEvaluatorGetIRQueue(stream, resources, state) {
      if (!state) {
        state = {
          text: '',
          mapping: []
        };
      }
      var self = this;
      var xref = this.xref;
@ -515,18 +521,22 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
      }
      resources = xref.fetchIfRef(resources) || new Dict();
      // The xobj is parsed iff it's needed, e.g. if there is a `DO` cmd.
      var xobjs = null;
      var parser = new Parser(new Lexer(stream), false);
      var res = resources;
      var args = [], obj;
-      var text = '';
+      var text = state.text;
      var chunk = '';
      var commandOffset = state.mapping;
      var font = null;
      while (!isEOF(obj = parser.getObj())) {
        if (isCmd(obj)) {
          var cmd = obj.cmd;
          switch (cmd) {
            // TODO: Add support for SAVE/RESTORE and XFORM here.
            case 'Tf':
              font = handleSetFont(args[0].name).translated;
              break;
@ -536,9 +546,12 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
                if (typeof items[j] === 'string') {
                  chunk += fontCharsToUnicode(items[j], font);
                } else if (items[j] < 0) {
                  // making all negative offsets a space - better to have
                  // a space in incorrect place than not have them at all
                  chunk += ' ';
                } else if (items[j] < 0 && font.spacedWidth > 0) {
                  var numFakeSpaces = Math.round(-e / font.spacedWidth);
                  if (numFakeSpaces > 0) {
                    chunk += ' ';
                  }
                }
              }
              break;
@ -551,8 +564,49 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
            case '"':
              chunk += fontCharsToUnicode(args[2], font) + ' ';
              break;
            case 'Do':
              // Set the chunk such that the following if won't add something
              // to the state.
              chunk = '';
              if (args[0].code) {
                break;
              }
              if (!xobjs) {
                xobjs = resources.get('XObject') || new Dict();
              }
              var name = args[0].name;
              var xobj = xobjs.get(name);
              if (!xobj)
                break;
              assertWellFormed(isStream(xobj), 'XObject should be a stream');
              var type = xobj.dict.get('Subtype');
              assertWellFormed(
                isName(type),
                'XObject should have a Name subtype'
              );
              if ('Form' !== type.name)
                break;
              // Add some spacing between the text here and the text of the
              // xForm.
              text = text + ' ';
              state.text = text;
              state = this.getTextContent(
                xobj,
                xobj.dict.get('Resources') || resources,
                state
              );
              text = state.text;
              break;
          } // switch
          if (chunk !== '') {
            commandOffset.push(text.length);
            text += chunk;
            chunk = '';
          }
@ -564,7 +618,10 @@ var PartialEvaluator = (function PartialEvaluatorClosure() {
        }
      }
-      return text;
+      return {
        text: text,
        mapping: commandOffset
      };
    },
    extractDataStructures: function
--- a/web/viewer.html
+++ b/web/viewer.html
@ -88,7 +88,7 @@ limitations under the License.
          <button id="viewOutline" class="toolbarButton group" title="Show Document Outline" tabindex="2" data-l10n-id="outline">
             <span data-l10n-id="outline_label">Document Outline</span>
          </button>
-          <button id="viewSearch" class="toolbarButton group hidden" title="Search Document" tabindex="3" data-l10n-id="search_panel">
+          <button id="viewSearch" class="toolbarButton group" title="Search Document" tabindex="3" data-l10n-id="search_panel">
             <span data-l10n-id="search_panel_label">Search Document</span>
          </button>
        </div>
--- a/web/viewer.js
+++ b/web/viewer.js
@ -1043,7 +1043,7 @@ var PDFView = {
    function extractPageText(pageIndex) {
      self.pages[pageIndex].pdfPage.getTextContent().then(
        function textContentResolved(textContent) {
-          self.pageText[pageIndex] = textContent;
+          self.pageText[pageIndex] = textContent.text;
          self.search();
          if ((pageIndex + 1) < self.pages.length)
            extractPageText(pageIndex + 1);