Question

I have a PDF form that needs to be filled out a bunch of times (it s a timesheet to be exact). Now since I don t want to do this by hand, I was looking for a way to fill them out using a python script or tools that could be used in a bash script.

是否有这方面的经验?

Answer 1

www.un.org/spanish/ecosoc 粉碎机需要fgen lib和pdftk 。

@Hugh bothwell的评论是100%正确,因此,我将随工作执行而扩大这一答案。

如果你在窗户上回来,你也需要确保 p和pdftk都包含在系统的道路上(除非你想要使用长篇的姓氏)。

这里有从CSV数据档案中收集的PDF表格的自动填写代码:

import csv
from fdfgen import forge_fdf
import os
import sys

sys.path.insert(0, os.getcwd())
filename_prefix = "NVC"
csv_file = "NVC.csv"
pdf_file = "NVC.pdf"
tmp_file = "tmp.fdf"
output_folder =  ./output/ 

def process_csv(file):
    headers = []
    data =  []
    csv_data = csv.reader(open(file))
    for i, row in enumerate(csv_data):
      if i == 0:
        headers = row
        continue;
      field = []
      for i in range(len(headers)):
        field.append((headers[i], row[i]))
      data.append(field)
    return data

def form_fill(fields):
  fdf = forge_fdf("",fields,[],[],[])
  fdf_file = open(tmp_file,"w")
  fdf_file.write(fdf)
  fdf_file.close()
  output_file =  {0}{1} {2}.pdf .format(output_folder, filename_prefix, fields[1][1])
  cmd =  pdftk "{0}" fill_form "{1}" output "{2}" dont_ask .format(pdf_file, tmp_file, output_file)
  os.system(cmd)
  os.remove(tmp_file)

data = process_csv(csv_file)
print( Generating Forms: )
print( ----------------------- )
for i in data:
  if i[0][1] ==  Yes :
    continue
  print( {0} {1} created... .format(filename_prefix, i[1][1]))
  form_fill(i)

<>说明: 说明如何定制这种数据,就应该有火箭弹。最初的变式声明包含的是习惯组合。

在CSV中,每栏一行将包含国防军档案中相应的实地名称。任何在模板中没有相应领域的栏目都将被忽视。

在PDF模板中,仅仅创造了可观的领域,你们想要获得数据以填充和确保姓名与CSV数据相符。

就这一具体组合而言,仅将这一档案放在与您的NVC.csv、NVC.pdf和一位名为产出的夹上。自行操作,自行操作。

Answer 2

更快捷的版本,不需要pdftk 或 fdfgen, 纯 Python 3.6+:

# -*- coding: utf-8 -*-

from collections import OrderedDict
from PyPDF2 import PdfFileWriter, PdfFileReader


def _getFields(obj, tree=None, retval=None, fileobj=None):
    """
    Extracts field data if this PDF contains interactive form fields.
    The *tree* and *retval* parameters are for recursive use.

    :param fileobj: A file object (usually a text file) to write
        a report to on all interactive form fields found.
    :return: A dictionary where each key is a field name, and each
        value is a :class:`Field<PyPDF2.generic.Field>` object. By
        default, the mapping name is used for keys.
    :rtype: dict, or ``None`` if form data could not be located.
    """
    fieldAttributes = { /FT :  Field Type ,  /Parent :  Parent ,  /T :  Field Name ,  /TU :  Alternate Field Name ,
                        /TM :  Mapping Name ,  /Ff :  Field Flags ,  /V :  Value ,  /DV :  Default Value }
    if retval is None:
        retval = OrderedDict()
        catalog = obj.trailer["/Root"]
        # get the AcroForm tree
        if "/AcroForm" in catalog:
            tree = catalog["/AcroForm"]
        else:
            return None
    if tree is None:
        return retval

    obj._checkKids(tree, retval, fileobj)
    for attr in fieldAttributes:
        if attr in tree:
            # Tree is a field
            obj._buildField(tree, retval, fileobj, fieldAttributes)
            break

    if "/Fields" in tree:
        fields = tree["/Fields"]
        for f in fields:
            field = f.getObject()
            obj._buildField(field, retval, fileobj, fieldAttributes)

    return retval


def get_form_fields(infile):
    infile = PdfFileReader(open(infile,  rb ))
    fields = _getFields(infile)
    return OrderedDict((k, v.get( /V ,   )) for k, v in fields.items())


def update_form_values(infile, outfile, newvals=None):
    pdf = PdfFileReader(open(infile,  rb ))
    writer = PdfFileWriter()

    for i in range(pdf.getNumPages()):
        page = pdf.getPage(i)
        try:
            if newvals:
                writer.updatePageFormFieldValues(page, newvals)
            else:
                writer.updatePageFormFieldValues(page,
                                                 {k: f #{i} {k}={v} 
                                                  for i, (k, v) in enumerate(get_form_fields(infile).items())
                                                  })
            writer.addPage(page)
        except Exception as e:
            print(repr(e))
            writer.addPage(page)

    with open(outfile,  wb ) as out:
        writer.write(out)


if __name__ ==  __main__ :
    from pprint import pprint

    pdf_file_name =  2PagesFormExample.pdf 

    pprint(get_form_fields(pdf_file_name))

    update_form_values(pdf_file_name,  out-  + pdf_file_name)  # enumerate & fill the fields with their own names
    update_form_values(pdf_file_name,  out2-  + pdf_file_name,
                       { my_fieldname_1 :  My Value ,
                         my_fieldname_2 :  My Another ?alue })  # update the form fields

Answer 3

原件

os.system( pdftk "original.pdf" fill_form "data.fdf" output "output.pdf" )
os.remove("data.fdf")
os.remove("original.pdf")
os.rename("output.pdf","original.pdf")

Answer 4

我写了一部建立在以下基础之上的图书馆:pdfrw , pdf2image , Pillow , PyPDF2 , PyPDF2, 称填充(pip 填充 /code>和Poppler Dependcode>condastal-c conda-forge poppler )。

基本用途:

from fillpdf import fillpdfs

fillpdfs.get_form_fields("blank.pdf")

# returns a dictionary of fields
# Set the returned dictionary values a save to a variable
# For radio boxes ( Off  = not filled,  Yes  = filled)

data_dict = {
 Text2 :  Name ,
 Text4 :  LastName ,
 box :  Yes ,
}

fillpdfs.write_fillable_pdf( blank.pdf ,  new.pdf , data_dict)

# If you want it flattened:
fillpdfs.flatten_pdf( new.pdf ,  newflat.pdf )

More info here: https://github.com/t-houssian/fillpdf

如果有些田地没有填满,则使用“灯”(pip 安装PyMuPDF)和PyPDF2(pip 安装PyPDF2<<>/code>)等,如有必要修改要点:


import fitz
from PyPDF2 import PdfFileReader

file_handle = fitz.open( blank.pdf )
pdf = PdfFileReader(open( blank.pdf , rb ))
box = pdf.getPage(0).mediaBox
w = box.getWidth()
h = box.getHeight()

# For images
image_rectangle = fitz.Rect((w/2)-200,h-255,(w/2)-100,h-118)
pages = pdf.getNumPages() - 1
last_page = file_handle[pages]
last_page._wrapContents()
last_page.insertImage(image_rectangle, filename=f image.png )

# For text
last_page.insertText(fitz.Point((w/2)-247 , h-478),  John Smith , fontsize=14, fontname="times-bold")
file_handle.save(f newpdf.pdf )

友情链接