From 695e71c5317ddd7aa93aaa9be2ccdc6ac46cee51 Mon Sep 17 00:00:00 2001 From: JG Heithcock Date: Sat, 4 Apr 2026 11:18:31 -0700 Subject: [PATCH] [MM-63434] Use forked PDF library with parsing depth limit Replace github.com/ledongthuc/pdf with a fork that limits object nesting depth during parsing. Add test coverage. --- server/go.mod | 3 +++ server/go.sum | 2 ++ server/platform/services/docextractor/pdf_test.go | 15 +++++++++++++++ 3 files changed, 20 insertions(+) diff --git a/server/go.mod b/server/go.mod index 4b1876d1174..d098a21525f 100644 --- a/server/go.mod +++ b/server/go.mod @@ -230,3 +230,6 @@ require ( // See MM-66167 for more details. replace github.com/vmihailenco/msgpack/v5 => github.com/mattermost/msgpack/v5 v5.0.0-20260120151306-2f9c67d7e57f + +// See MM-63434 for more details. +replace github.com/ledongthuc/pdf => github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe diff --git a/server/go.sum b/server/go.sum index 31145b4c75d..9c68e8fa1ff 100644 --- a/server/go.sum +++ b/server/go.sum @@ -310,6 +310,8 @@ github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKl github.com/jaytaylor/html2text v0.0.0-20260303211410-1a4bdc82ecec h1:DrV+GDNKHeHyfqEZaoxQoHlWcgTBiaJ8ZUyNyd5vvkY= github.com/jaytaylor/html2text v0.0.0-20260303211410-1a4bdc82ecec/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk= github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU= +github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe h1:9GAP+hdboArdSUwi82IXaNd+Qq8+cGFQh7xAcwZNN+s= +github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4= github.com/jhump/protoreflect v1.17.0 h1:qOEr613fac2lOuTgWN4tPAtLL7fUSbuJL5X5XumQh94= github.com/jhump/protoreflect v1.17.0/go.mod h1:h9+vUUL38jiBzck8ck+6G/aeMX8Z4QUY/NiJPwPNi+8= github.com/jmoiron/sqlx v1.4.0 h1:1PLqN7S1UYp5t4SrVVnt4nUVNemrDAtxlulVe+Qgm3o= diff --git a/server/platform/services/docextractor/pdf_test.go b/server/platform/services/docextractor/pdf_test.go index ef7498bf497..722cbf64896 100644 --- a/server/platform/services/docextractor/pdf_test.go +++ b/server/platform/services/docextractor/pdf_test.go @@ -28,6 +28,21 @@ func TestPdfFile(t *testing.T) { require.Equal(t, contentText, extractedText) } +func TestPdfDeeplyNestedObjects(t *testing.T) { + // Test for MM-63434 + var buf bytes.Buffer + buf.WriteString("%PDF-1.0\n") + for range 10_000 { + buf.WriteString("0\n0\nobj\n") + } + buf.WriteString("startxref\n0\n%%EOF\n") + + extractor := pdfExtractor{} + text, err := extractor.Extract("excessive-nests.pdf", bytes.NewReader(buf.Bytes()), 0) + require.Error(t, err) + require.Empty(t, text) +} + func TestWrongPdfFile(t *testing.T) { extractor := pdfExtractor{} content, err := testutils.ReadTestFile("sample-doc.docx")