mirror of
https://github.com/mattermost/mattermost.git
synced 2026-05-28 04:35:04 -04:00
* [MM-63434] Use forked PDF library with parsing depth limit (#35947)
* [MM-63434] Use forked PDF library with parsing depth limit
Replace github.com/ledongthuc/pdf with a fork that limits object
nesting depth during parsing. Add test coverage.
* Reverting incorrect merge that lost the change to msgpack
The error was in merge 64bdff88d8
* Remove stale ledongthuc/pdf checksums after fork replace (go mod tidy)
* Fix TestPdfFile expected text for forked PDF extractor (release-10.11)
The jgheithcock/pdf fork returns a leading newline for sample-doc.pdf; align
with master and 11.x cherry-picks.
This commit is contained in:
parent
61d68d2d6e
commit
95be6eaf86
3 changed files with 21 additions and 7 deletions
|
|
@ -248,10 +248,6 @@ exclude (
|
|||
github.com/willf/bitset v1.2.0
|
||||
)
|
||||
|
||||
// Prevent from being upgraded because this library has a minimum requirement
|
||||
// of Go 1.24.
|
||||
replace github.com/ledongthuc/pdf => github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06
|
||||
|
||||
// Also prevent tablewriter from being upgraded because the downstream dependency
|
||||
// jaytaylor/html2text does not have a go.mod file which makes it bump to the latest
|
||||
// version always. Tablewriter has made breaking changes to its latest release.
|
||||
|
|
@ -259,3 +255,6 @@ replace github.com/olekukonko/tablewriter => github.com/olekukonko/tablewriter v
|
|||
|
||||
// See MM-66167, MM-68222 for more details.
|
||||
replace github.com/vmihailenco/msgpack/v5 => github.com/mattermost/msgpack/v5 v5.0.0-20260408165622-cadfad56a815
|
||||
|
||||
// See MM-63434 for more details.
|
||||
replace github.com/ledongthuc/pdf => github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe
|
||||
|
|
|
|||
|
|
@ -352,6 +352,8 @@ github.com/jaytaylor/html2text v0.0.0-20180606194806-57d518f124b0/go.mod h1:CVKl
|
|||
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056 h1:iCHtR9CQyktQ5+f3dMVZfwD2KWJUgm7M0gdL9NGr8KA=
|
||||
github.com/jaytaylor/html2text v0.0.0-20230321000545-74c2419ad056/go.mod h1:CVKlgaMiht+LXvHG173ujK6JUhZXKb2u/BQtjPDIvyk=
|
||||
github.com/jellevandenhooff/dkim v0.0.0-20150330215556-f50fe3d243e1/go.mod h1:E0B/fFc00Y+Rasa88328GlI/XbtyysCtTHZS8h7IrBU=
|
||||
github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe h1:9GAP+hdboArdSUwi82IXaNd+Qq8+cGFQh7xAcwZNN+s=
|
||||
github.com/jgheithcock/pdf v0.0.0-20260404175814-28cd6530c1fe/go.mod h1:1fEHWurg7pvf5SG6XNE5Q8UZmOwex51Mkx3SLhrW5B4=
|
||||
github.com/jhump/protoreflect v1.15.1 h1:HUMERORf3I3ZdX05WaQ6MIpd/NJ434hTp5YiKgfCL6c=
|
||||
github.com/jhump/protoreflect v1.15.1/go.mod h1:jD/2GMKKE6OqX8qTjhADU1e6DShO+gavG9e0Q693nKo=
|
||||
github.com/jmespath/go-jmespath v0.4.0 h1:BEgLn5cpjn8UN1mAw4NjwDrS35OdebyEtFe+9YPoQUg=
|
||||
|
|
@ -403,8 +405,6 @@ github.com/lann/builder v0.0.0-20180802200727-47ae307949d0 h1:SOEGU9fKiNWd/HOJuq
|
|||
github.com/lann/builder v0.0.0-20180802200727-47ae307949d0/go.mod h1:dXGbAdH5GtBTC4WfIxhKZfyBF/HBFgRZSWwZ9g/He9o=
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0 h1:P6pPBnrTSX3DEVR4fDembhRWSsG5rVo6hYhAB/ADZrk=
|
||||
github.com/lann/ps v0.0.0-20150810152359-62de8c46ede0/go.mod h1:vmVJ0l/dxyfGW6FmdpVm2joNMFikkuWg0EoCKLGUMNw=
|
||||
github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06 h1:kacRlPN7EN++tVpGUorNGPn/4DnB7/DfTY82AOn6ccU=
|
||||
github.com/ledongthuc/pdf v0.0.0-20240201131950-da5b75280b06/go.mod h1:imJHygn/1yfhB7XSJJKlFZKl/J+dCPAknuiaGOshXAs=
|
||||
github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5 h1:W7p+m/AECTL3s/YR5RpQ4hz5SjNeKzZBl1q36ws12s0=
|
||||
github.com/levigross/exp-html v0.0.0-20120902181939-8df60c69a8f5/go.mod h1:QMe2wuKJ0o7zIVE8AqiT8rd8epmm6WDIZ2wyuBqYPzM=
|
||||
github.com/lib/pq v1.10.9 h1:YXG7RB+JIjhP29X+OtkiDnYaXQwpS4JEWq7dtCCRUEw=
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ func TestPdfEmptyFile(t *testing.T) {
|
|||
|
||||
func TestPdfFile(t *testing.T) {
|
||||
extractor := pdfExtractor{}
|
||||
contentText := "This is a simple document that contains some text."
|
||||
contentText := "\nThis is a simple document that contains some text."
|
||||
content, err := testutils.ReadTestFile("sample-doc.pdf")
|
||||
require.NoError(t, err)
|
||||
extractedText, err := extractor.Extract("sample-doc.pdf", bytes.NewReader(content), 0)
|
||||
|
|
@ -28,6 +28,21 @@ func TestPdfFile(t *testing.T) {
|
|||
require.Equal(t, contentText, extractedText)
|
||||
}
|
||||
|
||||
func TestPdfDeeplyNestedObjects(t *testing.T) {
|
||||
// Test for MM-63434
|
||||
var buf bytes.Buffer
|
||||
buf.WriteString("%PDF-1.0\n")
|
||||
for range 10_000 {
|
||||
buf.WriteString("0\n0\nobj\n")
|
||||
}
|
||||
buf.WriteString("startxref\n0\n%%EOF\n")
|
||||
|
||||
extractor := pdfExtractor{}
|
||||
text, err := extractor.Extract("excessive-nests.pdf", bytes.NewReader(buf.Bytes()), 0)
|
||||
require.Error(t, err)
|
||||
require.Empty(t, text)
|
||||
}
|
||||
|
||||
func TestWrongPdfFile(t *testing.T) {
|
||||
extractor := pdfExtractor{}
|
||||
content, err := testutils.ReadTestFile("sample-doc.docx")
|
||||
|
|
|
|||
Loading…
Reference in a new issue