Skip to content

Commit 73898f5

Browse files
authored
Merge pull request #102 from gunnsth/release/v3.0.3
Prepare release v3.0.3
2 parents 10d7020 + 002ccc4 commit 73898f5

File tree

11 files changed

+347
-33
lines changed

11 files changed

+347
-33
lines changed

Gopkg.lock

Lines changed: 68 additions & 6 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Jenkinsfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ node {
1515
env.UNIDOC_PASSTHROUGH_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
1616
env.UNIDOC_ALLOBJECTS_TESTDATA="/home/jenkins/corpus/unidoc-e2e-testdata"
1717
env.UNIDOC_SPLIT_TESTDATA="/home/jenkins/corpus/unidoc-e2e-split-testdata"
18+
env.UNIDOC_FDFMERGE_TESTDATA="/home/jenkins/corpus/fdfmerge-testdata"
1819
env.UNIDOC_GS_BIN_PATH="/usr/bin/gs"
1920
// Hack for 1.11.5 testing work.
2021
env.CGO_ENABLED="0"

common/version.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ import (
1212

1313
const releaseYear = 2019
1414
const releaseMonth = 6
15-
const releaseDay = 11
16-
const releaseHour = 22
15+
const releaseDay = 27
16+
const releaseHour = 20
1717
const releaseMin = 10
1818

1919
// Version holds version information, when bumping this make sure to bump the released at stamp also.
20-
const Version = "3.0.2"
20+
const Version = "3.0.3"
2121

2222
var ReleasedAt = time.Date(releaseYear, releaseMonth, releaseDay, releaseHour, releaseMin, 0, 0, time.UTC)

core/parser.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -768,6 +768,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
768768
curObjNum := -1
769769
secObjects := 0
770770
insideSubsection := false
771+
unmatchedContent := ""
771772
for {
772773
parser.skipSpaces()
773774
_, err := parser.reader.Peek(1)
@@ -781,13 +782,24 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
781782
}
782783

783784
result1 := reXrefSubsection.FindStringSubmatch(txt)
785+
if len(result1) == 0 {
786+
// Try to match invalid subsection beginning lines from previously
787+
// read, unidentified lines. Covers cases in which the object number
788+
// and the number of entries in the subsection are not on the same line.
789+
tryMatch := len(unmatchedContent) > 0
790+
unmatchedContent += txt + "\n"
791+
if tryMatch {
792+
result1 = reXrefSubsection.FindStringSubmatch(unmatchedContent)
793+
}
794+
}
784795
if len(result1) == 3 {
785796
// Match
786797
first, _ := strconv.Atoi(result1[1])
787798
second, _ := strconv.Atoi(result1[2])
788799
curObjNum = first
789800
secObjects = second
790801
insideSubsection = true
802+
unmatchedContent = ""
791803
common.Log.Trace("xref subsection: first object: %d objects: %d", curObjNum, secObjects)
792804
continue
793805
}
@@ -801,6 +813,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
801813
first, _ := strconv.ParseInt(result2[1], 10, 64)
802814
gen, _ := strconv.Atoi(result2[2])
803815
third := result2[3]
816+
unmatchedContent = ""
804817

805818
if strings.ToLower(third) == "n" && first > 1 {
806819
// Object in use in the file! Load it.
@@ -829,6 +842,7 @@ func (parser *PdfParser) parseXrefTable() (*PdfObjectDictionary, error) {
829842
curObjNum++
830843
continue
831844
}
845+
832846
if (len(txt) > 6) && (txt[:7] == "trailer") {
833847
common.Log.Trace("Found trailer - %s", txt)
834848
// Sometimes get "trailer << ...."
@@ -1521,6 +1535,11 @@ func (parser *PdfParser) ParseIndirectObject() (PdfObject, error) {
15211535
return &indirect, err
15221536
}
15231537
common.Log.Trace("Parsed object ... finished.")
1538+
} else if bb[0] == ']' {
1539+
// ']' not used as an array object ending marker, or array object
1540+
// terminated multiple times. Discarding the character.
1541+
common.Log.Debug("WARNING: ']' character not being used as an array ending marker. Skipping.")
1542+
parser.reader.Discard(1)
15241543
} else {
15251544
if bb[0] == 'e' {
15261545
lineStr, err := parser.readTextLine()

internal/e2etest/fdfmerge_test.go

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/*
2+
* This file is subject to the terms and conditions defined in
3+
* file 'LICENSE.md', which is part of this source code package.
4+
*/
5+
6+
package e2etest
7+
8+
import (
9+
"io/ioutil"
10+
"os"
11+
"path/filepath"
12+
"runtime/debug"
13+
"strings"
14+
"testing"
15+
16+
"github.com/stretchr/testify/require"
17+
18+
"github.com/unidoc/unipdf/v3/annotator"
19+
"github.com/unidoc/unipdf/v3/fdf"
20+
"github.com/unidoc/unipdf/v3/model"
21+
)
22+
23+
// FDF merge tests merge FDF data into template PDF data and flattens to an output PDF file.
24+
// Output files are checked with ghostscript and memory consumption is measured.
25+
// Set environment variables:
26+
// UNIDOC_E2E_FORCE_TESTS to "1" to force the tests to execute.
27+
// UNIDOC_FDFMERGE_TESTDATA to the path of the corpus folder.
28+
// UNIDOC_GS_BIN_PATH to the path of the ghostscript binary (gs) for validation.
29+
var (
30+
fdfMergeCorpusFolder = os.Getenv("UNIDOC_FDFMERGE_TESTDATA")
31+
)
32+
33+
// fdfMergeHashes defines a list of known output hashes to ensure that the output is constant.
34+
// If there is a change in hash need to find out why and update only if the change is accepted.
35+
var fdfMergeHashes = map[string]string{
36+
"NW_null_Business_V04.fdf": "6e33f219994e4b9ee1e1843c976504df",
37+
"NW_null_Business_V05.fdf": "ff1f8bd39f9be9844a6d85bafe07c790",
38+
"NW_null_Business_V05.v1.2.fdf": "ff1f8bd39f9be9844a6d85bafe07c790",
39+
"NW_null_Contract_V04.fdf": "a54f4b42dc34997cfb701ef647cdbdfe",
40+
"N_null_Contract.fdf": "c173340d6492984532cf51a4f5ceb4b6",
41+
"Network_Contract_V01.fdf": "0ae2537bf8a8366aa97c1ca965b88d1f",
42+
"checkmark_check.fdf": "8892cdb01318421f8d198233b80ab8e3",
43+
"checkmark_circle.fdf": "3b1e6ef6aae2a7497b090e0960d2c163",
44+
"checkmark_cross.fdf": "6b16b6d7437a3f59a7e9e72c1ecfd59b",
45+
"checkmark_diamond.fdf": "123488e428914832f21e213339ed74f1",
46+
"checkmark_square.fdf": "d0ac69dac7a933e440a5005b1712edeb",
47+
"checkmark_star.fdf": "1326f152fb8158dffc08e5bb51cba1bc",
48+
"test_fail.fdf": "9a90cef679d6b4c13017c73c2528ca75",
49+
}
50+
51+
// Test filling (fdf merge) and flattening form data and annotations.
52+
func TestFdfMerging(t *testing.T) {
53+
if len(fdfMergeCorpusFolder) == 0 {
54+
if forceTest {
55+
t.Fatalf("UNIDOC_FDFMERGE_TESTDATA not set")
56+
}
57+
t.Skipf("UNIDOC_FDFMERGE_TESTDATA not set")
58+
}
59+
60+
files, err := ioutil.ReadDir(fdfMergeCorpusFolder)
61+
if err != nil {
62+
if forceTest {
63+
t.Fatalf("Error opening %s: %v", fdfMergeCorpusFolder, err)
64+
}
65+
t.Skipf("Skipping flatten bench - unable to open UNIDOC_FDFMERGE_TESTDATA (%s)", fdfMergeCorpusFolder)
66+
}
67+
68+
// Make a temporary folder and clean up after.
69+
tempdir, err := ioutil.TempDir("", "unidoc_fdfmerge")
70+
require.NoError(t, err)
71+
defer os.RemoveAll(tempdir)
72+
73+
matchcount := 0
74+
for _, file := range files {
75+
if strings.ToLower(filepath.Ext(file.Name())) != ".fdf" {
76+
continue
77+
}
78+
fdfPath := filepath.Join(fdfMergeCorpusFolder, file.Name())
79+
bareName := strings.TrimSuffix(file.Name(), ".fdf")
80+
pdfPath := filepath.Join(fdfMergeCorpusFolder, bareName+".pdf")
81+
82+
// Ensure memory is garbage collected prior to running for consistency.
83+
debug.FreeOSMemory()
84+
85+
t.Logf("%s", file.Name())
86+
params := fdfMergeParams{
87+
templatePath: pdfPath,
88+
fdfPath: fdfPath,
89+
outPath: filepath.Join(tempdir, "filled_flatten_1_"+bareName+".pdf"),
90+
gsValidation: len(ghostscriptBinPath) > 0,
91+
}
92+
fdfMergeSingle(t, params)
93+
94+
hash, err := hashFile(params.outPath)
95+
require.NoError(t, err)
96+
97+
knownHash, has := fdfMergeHashes[file.Name()]
98+
if has {
99+
require.Equal(t, knownHash, hash)
100+
matchcount++
101+
} else {
102+
t.Logf("Output: %s", params.outPath)
103+
t.Logf("%s - hash: %s not in the list of known hashes", file.Name(), hash)
104+
}
105+
}
106+
107+
// Ensure all the defined hashes were found.
108+
require.Equal(t, len(fdfMergeHashes), matchcount)
109+
110+
t.Logf("FDF merge benchmark complete for %d cases in %s", matchcount, fdfMergeCorpusFolder)
111+
}
112+
113+
type fdfMergeParams struct {
114+
templatePath string // template PDF file.
115+
fdfPath string // form data FDF file.
116+
outPath string
117+
gsValidation bool
118+
}
119+
120+
func fdfMergeSingle(t *testing.T, params fdfMergeParams) {
121+
measure := startMemoryMeasurement()
122+
123+
fdfData, err := fdf.LoadFromPath(params.fdfPath)
124+
require.NoError(t, err)
125+
126+
f, err := os.Open(params.templatePath)
127+
require.NoError(t, err)
128+
defer f.Close()
129+
130+
pdfReader, err := model.NewPdfReader(f)
131+
require.NoError(t, err)
132+
133+
// Populate the form data.
134+
err = pdfReader.AcroForm.Fill(fdfData)
135+
require.NoError(t, err)
136+
137+
// Flatten form.
138+
fieldAppearance := annotator.FieldAppearance{OnlyIfMissing: true, RegenerateTextFields: true}
139+
140+
// NOTE: To customize certain styles try:
141+
style := fieldAppearance.Style()
142+
style.CheckmarkRune = '✖'
143+
style.AutoFontSizeFraction = 0.70
144+
fieldAppearance.SetStyle(style)
145+
146+
err = pdfReader.FlattenFields(true, fieldAppearance)
147+
require.NoError(t, err)
148+
149+
// Write out.
150+
model.SetPdfProducer("UniDoc")
151+
pdfWriter := model.NewPdfWriter()
152+
pdfWriter.SetForms(nil)
153+
154+
for _, p := range pdfReader.PageList {
155+
err = pdfWriter.AddPage(p)
156+
require.NoError(t, err)
157+
}
158+
159+
fout, err := os.Create(params.outPath)
160+
require.NoError(t, err)
161+
defer fout.Close()
162+
163+
err = pdfWriter.Write(fout)
164+
require.NoError(t, err)
165+
166+
measure.Stop()
167+
summary := measure.Summary()
168+
t.Logf("%s - summary %s", params.templatePath, summary)
169+
}

model/fields.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,6 +681,15 @@ func (r *PdfReader) newPdfFieldFromIndirectObject(container *core.PdfIndirectObj
681681
for _, obj := range kids.Elements() {
682682
container, isIndirect := core.GetIndirect(obj)
683683
if !isIndirect {
684+
stream, ok := core.GetStream(obj)
685+
if ok && stream.PdfObjectDictionary != nil {
686+
nodeType, ok := core.GetNameVal(stream.Get("Type"))
687+
if ok && nodeType == "Metadata" {
688+
common.Log.Debug("ERROR: form field Kids array contains invalid Metadata stream. Skipping.")
689+
continue
690+
}
691+
}
692+
684693
return nil, errors.New("not an indirect object (form field)")
685694
}
686695

0 commit comments

Comments
 (0)