#!/usr/bin/env python3

""" extracts data from ELZ PDFs using Poppler pdftotext """

import subprocess
import logging


class PDFParsing:
    """ PDF parsing """

    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.logger.info("PDF parsing based on pdftotext loaded")

    def extract(self, f_id, file, datafields):

        self.logger.info("[%s] parsing PDF file %s", f_id, file)

        data = {}

        for field, coordinate in datafields.items():

            # x-coordinate of the crop area top left corner
            x = coordinate["xMin"]

            # y-coordinate of the crop area top left corner
            y = coordinate["yMin"]

            # width of crop area in pixels
            w = coordinate["xMax"] - coordinate["xMin"]

            # height of crop area in pixels
            h = coordinate["yMax"] - coordinate["yMin"]

            self.logger.debug(
                "[%s] Computed command for field %s: %s",
                f_id,
                field,
                "pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}".format(x, y, w, h),
            )

            scrapeddata = subprocess.Popen(
                [
                    "/usr/bin/pdftotext",
                    "-f",
                    "1",
                    "-l",
                    "1",
                    "-x",
                    str(x),
                    "-y",
                    str(y),
                    "-W",
                    str(w),
                    "-H",
                    str(h),
                    file,
                    "-",
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True,
            )
            stdout, _ = scrapeddata.communicate()

            ## TODO: fixup some fields (lowercase, remove unnecessary \n)
            if "edit" in coordinate and coordinate["edit"] == "title":
                data[field] = stdout.rstrip().title()
            else:
                data[field] = stdout.rstrip()

        # sanity check to see if we can correlate the f_id
        if f_id == data["auftrag"]:
            self.logger.debug("[%s] ID matches in PDF", f_id)
            return data
        else:
            self.logger.error(
                '[%s] ID does not match in PDF: "%s"', f_id, data["auftrag"]
            )
            return False

    def extract_einsatzausdruck(self, file, f_id):
        """ extracts information from Einsatzausdruck using external pdftotext """

        self.logger.debug("[%s] Parsing PDF: %s", f_id, file)

        # Get them using 'pdftotext -bbox'
        # y = row
        # x = column: xMax 450 / 590 means full width
        coordinates = {
            "auftrag": {
                "xMin": 70,
                "yMin": 47,
                "xMax": 120,
                "yMax": 58,
            },
            "datum": {
                "xMin": 190,
                "yMin": 47,
                "xMax": 239,
                "yMax": 58,
            },
            "zeit": {
                "xMin": 190,
                "yMin": 59,
                "xMax": 215,
                "yMax": 70,
            },
            "melder": {
                "xMin": 304,
                "yMin": 47,
                "xMax": 446,
                "yMax": 70,
                "edit": "title",
            },
            "erfasser": {
                "xMin": 448,
                "yMin": 59,
                "xMax": 478,
                "yMax": 70,
            },
            # big field until "Disponierte Einheiten"
            "bemerkungen": {
                "xMin": 28,
                "yMin": 112,
                "xMax": 590,
                "yMax": 350,
            },
            "disponierteeinheiten": {
                "xMin": 28,
                "yMin": 366,
                "xMax": 450,
                "yMax": 376,
            },
            "einsatz": {
                "xMin": 76,
                "yMin": 690,
                "xMax": 450,
                "yMax": 703,
            },
            "sondersignal": {
                "xMin": 76,
                "yMin": 707,
                "xMax": 450,
                "yMax": 721,
            },
            "ort": {
                "xMin": 76,
                "yMin": 732,
                "xMax": 590,
                "yMax": 745,
            },
            "hinweis": {
                "xMin": 76,
                "yMin": 773,
                "xMax": 450,
                "yMax": 787,
            },
        }

        return self.extract(f_id, file, coordinates)

    def extract_einsatzprotokoll(self, file, f_id):
        """ extracts information from Einsatzprotokoll using external pdftotext """

        self.logger.debug("[%s] Parsing PDF: %s", f_id, file)

        # Get them using 'pdftotext -bbox'
        # y = row
        # x = column: xMax 450 / 590 means full width
        coordinates = {
            "auftrag": {
                "xMin": 192,
                "yMin": 132,
                "xMax": 238,
                "yMax": 142,
            },
            "angelegt": {
                "xMin": 192,
                "yMin": 294,
                "xMax": 226,
                "yMax": 304,
            },
            "dispo": {
                "xMin": 192,
                "yMin": 312,
                "xMax": 226,
                "yMax": 322,
            },
            "ausgerueckt": {
                "xMin": 192,
                "yMin": 331,
                "xMax": 226,
                "yMax": 341,
            },
            "vorort": {
                "xMin": 192,
                "yMin": 348,
                "xMax": 226,
                "yMax": 358,
            },
        }

        return self.extract(f_id, file, coordinates)