PyPDF2 这个以读的方式打开,不写是不是下次写的时候 是从上次打开的页面开始写?

!/usr/bin env python3

author = "stephen"
time = '2018_09_03'
import PyPDF2
from PyPDF2 import utils
import os
import shutil

class remove_water_mark(object):

def __init__(self):
    self.pdf_path = '/home/shenjianlin/fake_pdf/'
    self.remove_path='/home/shenjianlin/fake_remove_path/'
    self.trash_pdf_path='/home/shenjianlin/rubbish_pdf/'
    self.del_path='/home/shenjianlin/pdf_file/del_file/'


def get_pdf(self):
    file_list = [ file for file in os.listdir(self.pdf_path) ]
    return file_list
def read_content(self):
    pdf_output = PyPDF2.PdfFileWriter()
    fileList = self.get_pdf()
    for old_file in fileList:
      new_file=old_file.replace(' ','-').replace('“','').replace('”','').replace('(','').replace(')','').replace(':','-')
      old_file=self.pdf_path+old_file
      if '.pdf' not in new_file:
          new_file=new_file+'.pdf'
      new_file=self.pdf_path+new_file
      os.rename(old_file,new_file)
      if os.path.isfile(new_file) and '.py' not in new_file :
          print('读取的文件为%s'%new_file)
          try:
             pdf = PyPDF2.PdfFileReader(open(new_file, "rb"))
          except Exception as e:
              print(e)
              print('文件有问题正在移动')
              shutil.move(new_file, self.del_path + os.path.basename(new_file))
              continue
          if pdf.isEncrypted:
              try:
                  pdf.decrypt('')
                  print('File decrypted pdf')
              except:
                  command = ("cp " + new_file +
                             " temp.pdf; qpdf --password='' --decrypt temp.pdf " + new_file
                             + "; rm temp.pdf")
                  os.system(command)
                  print('File Decrypted (qpdf)')
                  pdf = PyPDF2.PdfFileReader(open(new_file, "rb"))
          flag=True
          for i in range(0, pdf.getNumPages()):
            if i == 1 and 'FormXob.86cdf15f1994e2f2b7032e461afd4234' not in str(pdf.getPage(i).get('/Resources')):
                    print('没有水印水处理,直接移动文件夹')
                    print('移动的文件夹为%s' % os.path.basename(new_file))
                    shutil.copy(new_file,self.remove_path)
                    shutil.move(new_file, self.trash_pdf_path)
                    flag = False
                    break
            elif  i < 3 and'FormXob.86cdf15f1994e2f2b7032e461afd4234' in str(pdf.getPage(i).get('/Resources')):
                print('有水印需要去除水印')
                Num_page_content = pdf.getPage(i)
                if Num_page_content.get('/Resources'):
                    page_resource = Num_page_content['/Resources']
                    if page_resource.get('/XObject'):
                        xobject = page_resource['/XObject']
                        form = None
                        for item in xobject:
                            if item.startswith('/FormXob'):
                                form = item
                        if form:
                            print('remove water mark in page: {}'.format(i))
                            xobject.pop(form)
                pdf_output.addPage(Num_page_content)
            else:
                pdf_output.addPage(pdf.getPage(i))
          if flag:
              with open(self.remove_path+os.path.basename(new_file), 'wb') as outfile:
                    try:
                     pdf_output.write(outfile)
                    except Exception as e:
                        print(e)
              print('watermark is get over')
              print('此时的文件夹为%s' % new_file)
              shutil.move(new_file, self.trash_pdf_path+os.path.basename(new_file))
              print('已经移动文件夹')
              print('\n')
          else:
              print()
              continue

if name == "__main__":

remove_water_mark().read_content()

题目描述

我这里想在第一页的时候直接转移文件,但是在处理pdf 的时候 把第一个pdf 的第一个页面赋给第二个pdf 的第一个页面去了,页面错乱了

阅读 1.1k
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题
宣传栏