[MM-63434] Use forked PDF library with parsing depth limit

Replace github.com/ledongthuc/pdf with a fork that limits object nesting depth during parsing. Add test coverage.
2026-05-28 04:35:04 -04:00 · 2026-04-04 11:18:31 -07:00 · 2026-04-04 11:18:31 -07:00 · 695e71c531
commit 695e71c531
parent 24e38f2bd7
3 changed files with 20 additions and 0 deletions
--- a/server/go.mod
+++ b/server/go.mod
@ -230,3 +230,6 @@ require (

 // See MM-66167 for more details.
 replace github.com/vmihailenco/msgpack/v5 => github.com/mattermost/msgpack/v5 v5.0.0-20260120151306-2f9c67d7e57f
+
+// See MM-63434 for more details.
+replace github.com/ledongthuc/pdf => github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe
--- a/server/go.sum
+++ b/server/go.sum
@ -310,6 +310,8 @@ github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKl
 github.com/jaytaylor/html2text v0.0.0-20260303211410-1a4bdc82ecec h1:DrV+GDNKHeHyfqEZaoxQoHlWcgTBiaJ8ZUyNyd5vvkY=
 github.com/jaytaylor/html2text v0.0.0-20260303211410-1a4bdc82ecec/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
 github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
+github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe h1:9GAP+hdboArdSUwi82IXaNd+Qq8+cGFQh7xAcwZNN+s=
+github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
 github.com/jhump/protoreflect v1.17.0 h1:qOEr613fac2lOuTgWN4tPAtLL7fUSbuJL5X5XumQh94=
 github.com/jhump/protoreflect v1.17.0/go.mod h1:h9+vUUL38jiBzck8ck+6G/aeMX8Z4QUY/NiJPwPNi+8=
 github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o=
--- a/server/platform/services/docextractor/pdf_test.go
+++ b/server/platform/services/docextractor/pdf_test.go
@ -28,6 +28,21 @@ func TestPdfFile(t *testing.T) {
 	require.Equal(t, contentText, extractedText)
 }

+func TestPdfDeeplyNestedObjects(t *testing.T) {
+	// Test for MM-63434
+	var buf bytes.Buffer
+	buf.WriteString("%PDF-1.0\n")
+	for range 10_000 {
+		buf.WriteString("0\n0\nobj\n")
+	}
+	buf.WriteString("startxref\n0\n%%EOF\n")
+
+	extractor := pdfExtractor{}
+	text, err := extractor.Extract("excessive-nests.pdf", bytes.NewReader(buf.Bytes()), 0)
+	require.Error(t, err)
+	require.Empty(t, text)
+}
+
 func TestWrongPdfFile(t *testing.T) {
 	extractor := pdfExtractor{}
 	content, err := testutils.ReadTestFile("sample-doc.docx")