~fd/comic-ocr

91e8ef4aec38d4c1ea025463de717464bf5a8961 — Ersei Saggi 1 year, 9 months ago f246775
Formatting
1 files changed, 47 insertions(+), 27 deletions(-)

M comicocr.py
M comicocr.py => comicocr.py +47 -27
@@ 7,9 7,10 @@ from imutils.object_detection import non_max_suppression
import numpy as np
from textblob import TextBlob

TESSCONFIG = "-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\\\'\!\?\ \. --oem 1"
TESSCONFIG = "-c tessedit_char_whitelist=0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ\\'\!\?\ \. --oem 1"

class ComicScanner():

class ComicScanner:
    def __init__(self) -> None:
        self.net = cv2.dnn.readNet("frozen_east_text_detection.pb")



@@ 19,23 20,22 @@ class ComicScanner():
        (H, W) = image.shape[:2]
        # set the new width and height and then determine the ratio in change
        # for both the width and height
        (newW, newH) = (W>>5<<5, H>>5<<5)
        (newW, newH) = (W >> 5 << 5, H >> 5 << 5)
        rW = W / float(newW)
        rH = H / float(newH)
        

        img = cv2.resize(image, (newW, newH))
        (H, W) = img.shape[:2]
        # define the two output layer names for the EAST detector model that
        # we are interested -- the first is the output probabilities and the
        # second can be used to derive the bounding box coordinates of text
        layerNames = [
            "feature_fusion/Conv_7/Sigmoid",
            "feature_fusion/concat_3"]
        layerNames = ["feature_fusion/Conv_7/Sigmoid", "feature_fusion/concat_3"]

        # construct a blob from the image and then perform a forward pass of
        # the model to obtain the two output layer sets
        blob = cv2.dnn.blobFromImage(img, 1.0, (W, H),
            (123.68, 116.78, 103.94), swapRB=True, crop=False)
        blob = cv2.dnn.blobFromImage(
            img, 1.0, (W, H), (123.68, 116.78, 103.94), swapRB=True, crop=False
        )
        self.net.setInput(blob)
        (scores, geometry) = self.net.forward(layerNames)
        # grab the number of rows and columns from the scores volume, then


@@ 55,7 55,7 @@ class ComicScanner():
            xData2 = geometry[0, 2, y]
            xData3 = geometry[0, 3, y]
            anglesData = geometry[0, 4, y]
        	# loop over the number of columns
            # loop over the number of columns
            for x in range(0, numCols):
                # if our score does not have sufficient probability, ignore it
                if scoresData[x] < 0.5:


@@ 70,8 70,8 @@ class ComicScanner():
                sin = np.sin(angle)
                # use the geometry volume to derive the width and height of
                # the bounding box
                h = 1.2*(xData0[x] + xData2[x])
                w = 1.2*(xData1[x] + xData3[x])
                h = 1.2 * (xData0[x] + xData2[x])
                w = 1.2 * (xData1[x] + xData3[x])
                # compute both the starting and ending (x, y)-coordinates for
                # the text prediction bounding box
                endX = int(offsetX + (cos * xData1[x]) + (sin * xData2[x]))


@@ 97,30 97,30 @@ class ComicScanner():
            box = (startX, startY, endX, endY)
            matched = False
            for i in range(0, len(clusters)):
                if (rect_overlaps(box, clusters[i])):
                if rect_overlaps(box, clusters[i]):
                    matched = True
                    clusters[i] = (
                        min(box[0], clusters[i][0]),
                        min(box[1], clusters[i][1]),
                        max(box[2], clusters[i][2]),
                        max(box[3], clusters[i][3])
                        max(box[3], clusters[i][3]),
                    )
            if not matched:
                clusters.append(box)

        # Remove duplicate clusters
        for _ in range(0,1):
        for _ in range(0, 1):
            finalClusters = []
            for cluster in clusters:
                matched = False
                for i in range(0, len(finalClusters)):
                    if (rect_overlaps(cluster, finalClusters[i])):
                    if rect_overlaps(cluster, finalClusters[i]):
                        matched = True
                        finalClusters[i] = (
                            min(cluster[0], finalClusters[i][0]),
                            min(cluster[1], finalClusters[i][1]),
                            max(cluster[2], finalClusters[i][2]),
                            max(cluster[3], finalClusters[i][3])
                            max(cluster[3], finalClusters[i][3]),
                        )
                if not matched:
                    finalClusters.append(cluster)


@@ 130,23 130,42 @@ class ComicScanner():
        suppressedClusters = non_max_suppression(np.array(finalClusters))

        for (startX, startY, endX, endY) in suppressedClusters:
            cropStartX = int(max(startX-10, 0))
            cropStartY = int(max(startY-10, 0))            
            cropEndX = int(min(endX+10,W*rW))
            cropEndY = int(min(endY+10,H*rH))
            cropStartX = int(max(startX - 10, 0))
            cropStartY = int(max(startY - 10, 0))
            cropEndX = int(min(endX + 10, W * rW))
            cropEndY = int(min(endY + 10, H * rH))
            cropped = orig[cropStartY:cropEndY, cropStartX:cropEndX]
            text = str(TextBlob("".join([c if ord(c) < 128 else "" for c in pytesseract.image_to_string(cropped, lang="eng", config=TESSCONFIG)]).strip().replace("[", "I").replace("\\", "l").replace("/","i").lower()).correct())
            text = str(
                TextBlob(
                    "".join(
                        [
                            c if ord(c) < 128 else ""
                            for c in pytesseract.image_to_string(
                                cropped, lang="eng", config=TESSCONFIG
                            )
                        ]
                    )
                    .strip()
                    .replace("[", "I")
                    .replace("\\", "l")
                    .replace("/", "i")
                    .lower()
                ).correct()
            )
            yield text

def rect_overlaps(r1,r2):

def rect_overlaps(r1, r2):
    poly1 = Polygon([[r1[0], r1[1]], [r1[0], r1[3]], [r1[2], r1[1]], [r1[2], r1[3]]])
    poly2 = Polygon([[r2[0], r2[1]], [r2[0], r2[3]], [r2[2], r2[1]], [r2[2], r2[3]]])
    return poly1.distance(poly2) < 5


def main():
    parser = argparse.ArgumentParser(description='Comic OCR processor')
    parser.add_argument('paths', metavar='path', type=str, nargs='+',
                        help='paths to images to scan')
    parser = argparse.ArgumentParser(description="Comic OCR processor")
    parser.add_argument(
        "paths", metavar="path", type=str, nargs="+", help="paths to images to scan"
    )

    args = parser.parse_args()



@@ 156,8 175,9 @@ def main():
        except:
            print("Path does not exist, or is not a valid image")
            break
        for text in scan_image(img):
        for text in ComicScanner.scan_image(img):
            print(text)


if __name__ == "__main__":
    main()