I have a list of objects: List = ['Doc1.xlsx','Doc2.csv','Doc3.pdf']
and a list of their names: List1 = ['Doc1_name.xlsx','Doc2_name.csv','Doc3_name.pdf']
.
I need to attach them in existing PDF. I tried with the folowing code, which works only if I have one attachement. Now I am trying to iterate over the attachements to attach all of them but in the Final.pdf
will be attached just the last Object 'Doc3.pdf'
.
fileReader = PdfFileReader('Existing_pdf.pdf', 'rb') fileWriter = PdfFileWriter() fileWriter = appendPagesFromReader(fileReader) for j in range(1, len(List)): fileWriter.addAtachment(List1[j],List[j]) with open('Final.pdf', 'wb') as output_pdf: fileWriter.write(output_pdf)
Advertisement
Answer
Seems to me like the addAttachment-Method allways replaces the current attachment.
From pdf.py in the PyPDF2 Github:
def addAttachment(self, fname, fdata): file_entry = DecodedStreamObject() file_entry.setData(fdata) file_entry.update({ NameObject("/Type"): NameObject("/EmbeddedFile") }) efEntry = DictionaryObject() efEntry.update({ NameObject("/F"):file_entry }) filespec = DictionaryObject() filespec.update({ NameObject("/Type"): NameObject("/Filespec"), NameObject("/F"): createStringObject(fname), # Perhaps also try TextStringObject NameObject("/EF"): efEntry }) embeddedFilesNamesDictionary = DictionaryObject() embeddedFilesNamesDictionary.update({ NameObject("/Names"): ArrayObject([createStringObject(fname), filespec]) }) embeddedFilesDictionary = DictionaryObject() embeddedFilesDictionary.update({ NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary }) # Update the root self._root_object.update({ NameObject("/Names"): embeddedFilesDictionary })
where i believe
self._root_object.update({ NameObject("/Names"): embeddedFilesDictionary })
replaces the attachment, instead of adding it.
EDIT:
This script worked for me to attach two .txt files.
It uses the above addAttachment
method which i have adjusted slightly to enable attaching multiple files.
from PyPDF2 import PdfFileReader, PdfFileWriter from PyPDF2.generic import DecodedStreamObject, NameObject, DictionaryObject, createStringObject, ArrayObject def appendAttachment(myPdfFileWriterObj, fname, fdata): # The entry for the file file_entry = DecodedStreamObject() file_entry.setData(fdata) file_entry.update({NameObject("/Type"): NameObject("/EmbeddedFile")}) # The Filespec entry efEntry = DictionaryObject() efEntry.update({ NameObject("/F"):file_entry }) filespec = DictionaryObject() filespec.update({NameObject("/Type"): NameObject("/Filespec"),NameObject("/F"): createStringObject(fname),NameObject("/EF"): efEntry}) if "/Names" not in myPdfFileWriterObj._root_object.keys(): # No files attached yet. Create the entry for the root, as it needs a reference to the Filespec embeddedFilesNamesDictionary = DictionaryObject() embeddedFilesNamesDictionary.update({NameObject("/Names"): ArrayObject([createStringObject(fname), filespec])}) embeddedFilesDictionary = DictionaryObject() embeddedFilesDictionary.update({NameObject("/EmbeddedFiles"): embeddedFilesNamesDictionary}) myPdfFileWriterObj._root_object.update({NameObject("/Names"): embeddedFilesDictionary}) else: # There are files already attached. Append the new file. myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(createStringObject(fname)) myPdfFileWriterObj._root_object["/Names"]["/EmbeddedFiles"]["/Names"].append(filespec) fr = PdfFileReader('dummy.pdf','rb') fw = PdfFileWriter() fw.appendPagesFromReader(fr) my_attach_files = ['test.txt','test2.txt'] for my_test in my_attach_files: with open(my_test, 'rb') as my_test_attachment: my_test_data = my_test_attachment.read() appendAttachment(fw, my_test, my_test_data) with open('dummy_new.pdf','wb') as file: fw.write(file)
Hope this works for you.