-
Notifications
You must be signed in to change notification settings - Fork 678
Closed
Labels
postponepostpone to a future versionpostpone to a future versionupstream bugbug outside this packagebug outside this package
Description
I am facing a text overlapping issue on applying redactions in a pdf.
I am using python 3.9.2 and pip 20.2.3 versions.
Image before applying redaction

Image After applying redaction

I am using the below code for redaction
# imports
import fitz
import re
import sys
import json
from matplotlib import colors
class Redactor:
# constructor
def __init__(self, path, desc, out, vertexes, color):
self.path = path
self.desc = desc.split(',')
self.out = out
self.vertexes = vertexes
self.color = color
def redaction(self):
# opening the pdf
doc = fitz.open(self.path)
# iterating through pages
for page in doc:
if not page._isWrapped:
page.wrapContents()
text = page.getText('text')
currentpage = page.number
dl = page.getDisplayList()
tp = dl.getTextPage()
# print(self.vertexes[currentpage])
for searchtext in self.desc or []:
# print(searchtext)
areas = tp.search(searchtext, quads=True)
# print(areas)
if (areas is not None):
[page.addRedactAnnot(area, fill=colors.to_rgb(
self.color)) for area in areas]
page.apply_redactions()
# print(areas)
for vertices in self.vertexes[currentpage]["vertices"]:
# print(vertices)
clip = fitz.Quad(((vertices[0]["x"], vertices[0]["y"]), (vertices[1]["x"], vertices[1]["y"]), (
vertices[2]["x"], vertices[2]["y"]), (vertices[3]["x"], vertices[3]["y"])))
page.addRedactAnnot(clip, fill=colors.to_rgb(self.color))
page.apply_redactions()
# saving it to a new pdf
doc.save(self.out, deflate=True, clean=True, linear=True)
print("Successfully redacted")
# driver code for testing
if __name__ == "__main__":
# replace it with name of the pdf file
path = sys.argv[1]
desc = sys.argv[2]
out = sys.argv[3]
color = sys.argv[4]
lines = sys.stdin.readlines()
gcpvalues = json.loads(lines[0])
vertexes = gcpvalues["vertex"]
redactor = Redactor(path, desc, out, vertexes, color)
redactor.redaction()
Metadata
Metadata
Assignees
Labels
postponepostpone to a future versionpostpone to a future versionupstream bugbug outside this packagebug outside this package