Merge pull request #9 from tobru/rewrite_pdf_parsing

Rewrite pdf parsing
2019-09-22 22:02:55 +02:00 · 2019-09-22 22:02:55 +02:00 · d9d72ee442
parent b5c7d7b7b1 fd90e8c2e9
commit d9d72ee442
12 changed files with 227 additions and 478 deletions
--- a/6
+++ b/6
@ -1,5 +1,11 @@
 FROM python:3.7
 # Install pdftotext
 RUN set -x; \
    apt update && \
    apt install -y poppler-utils && \
    rm -rf /var/lib/apt/lists/*
 WORKDIR /usr/src/pylokid
 COPY requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt
--- a/dashboard_client.py
+++ b/dashboard_client.py
@ -1,114 +0,0 @@
 #!/usr/bin/env python3
 """ The dashboard client """
 import os
 import logging
 import subprocess
 from dotenv import find_dotenv, load_dotenv
 import paho.mqtt.client as mqtt
 # Configuration
 load_dotenv(find_dotenv())
 MQTT_SERVER = os.getenv("MQTT_SERVER")
 MQTT_USER = os.getenv("MQTT_USER")
 MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
 MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
 CEC_ENABLED = os.getenv("CEC_ENABLED", "yes")
 TMP_DIR = os.getenv("TMP_DIR", "/tmp")
 # Initialization
 logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 LOGGER = logging.getLogger('dashboard')
 PIDS = {}
 def on_connect(client, userdata, flags, rc):
    LOGGER.info("Connected to MQTT with result code %s", str(rc))
    # Subscribing in on_connect() means that if we lose the connection and
    # reconnect then subscriptions will be renewed.
    client.subscribe("pylokid/#")
 def on_message(client, userdata, msg):
    topic_detail = msg.topic.split("/")
    f_id = topic_detail[2]
    if topic_detail[1] == 'Einsatzausdruck_FW' and topic_detail[3] == 'pdf':
        LOGGER.info("[%s] New Einsatzausdruck received", f_id)
        file_name = TMP_DIR + "/dashboard_" + f_id + ".pdf"
        file = open(file_name, "wb")
        file.write(msg.payload)
        file.close()
        if f_id in PIDS:
            LOGGER.info(
                "[%s] Einsatzausdruck already being displayed with PID %s",
                f_id,
                str(PIDS[f_id])
            )
        else:
            LOGGER.info("[%s] Displaying Einsatzausdruck with xpdf", f_id)
            # TODO turn on TV with cec-client
            process = subprocess.Popen(
                ["/usr/bin/xpdf", "-z", "width", "-fullscreen", file_name],
                env=dict(os.environ, DISPLAY=":0")
            )
            PIDS[f_id] = process.pid
            if CEC_ENABLED == "yes":
                # Check power state of TV
                status = subprocess.run(
                    ["/usr/bin/cec-client", "-s", "-d", "1"],
                    stdout=subprocess.PIPE,
                    input=b'pow 0').stdout
                if status.splitlines()[1] == b'power status: standby':
                    LOGGER.info("[%s] CEC power status: standby. Powering TV on", f_id)
                    subprocess.run(
                        ["/usr/bin/cec-client", "-s", "-d", "1"],
                        stdout=subprocess.PIPE,
                        input=b'on 0'
                    )
                else:
                    LOGGER.info("[%s] CEC power status: probably on", f_id)
    elif topic_detail[1] == 'Einsatzprotokoll':
        LOGGER.info("[%s] New Einsatzprotokoll received", f_id)
        if f_id in PIDS:
            LOGGER.info("[%s] Killing xpdf PID %s", f_id, str(PIDS[f_id]))
            os.kill(PIDS[f_id], 9)
            PIDS.pop(f_id)
        else:
            LOGGER.info("[%s] No xpdf PID found", f_id)
        if CEC_ENABLED == "yes":
            # Turn off TV if no xpdf running anymore
            if not PIDS:
                LOGGER.info("[%s] No xpdf running anymore. Powering TV off", f_id)
                subprocess.run(
                    ["/usr/bin/cec-client", "-s", "-d", "1"],
                    stdout=subprocess.PIPE,
                    input=b'standby 0'
                )
    else:
        LOGGER.info("[%s] Unknown", topic_detail[1])
 def main():
    """ main """
    mqtt_client = mqtt.Client()
    mqtt_client.on_connect = on_connect
    mqtt_client.on_message = on_message
    mqtt_client.username_pw_set(MQTT_USER, password=MQTT_PASSWORD)
    mqtt_client.tls_set()
    mqtt_client.connect(MQTT_SERVER, 8883, 60)
    mqtt_client.loop_forever()
 if __name__ == '__main__':
    try:
        main()
    except KeyboardInterrupt:
        print("Byebye")
--- a/dashboard_client.service
+++ b/dashboard_client.service
@ -1,14 +0,0 @@
 [Unit]
 Description=PyLokid Dashboard Client
 After=network.target
 [Service]
 User=pi
 Restart=always
 Environment="MQTT_SERVER=mybroker.example.com"
 Environment="MQTT_USER=myuser"
 Environment="MQTT_PASSWORD=mypassword"
 ExecStart=/usr/bin/python3 /opt/dashboard_client.py
 [Install]
 WantedBy=multi-user.target
--- a/library/gotify.py
+++ b/library/gotify.py
@ -1,36 +0,0 @@
 #!/usr/bin/env python3
 """ Gotify Functions """
 import logging
 import json
 from urllib.parse import urljoin
 import requests
 class GotifyClient:
    """ Gotify Client """
    def __init__(self, url, token):
        self.logger = logging.getLogger(__name__)
        self.logger.info('Gotify URL %s', url)
        self.url = url
        self.token = token
    def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
        """ Publish a message over Gotify """
        requestURL = urljoin(self.url, '/message?token=' + self.token)
        try:
            resp = requests.post(requestURL, json={
                'title': 'Einsatz ' + f_id,
                'message': f_type,
                'priority': 5
            })
        except requests.exceptions.RequestException as err:
            self.logger.error('[%s] Could not connect to Gotify server: %e', f_id, err)
        # Print request result if server returns http error code
        if resp.status_code is not requests.codes.ok:
            self.logger.error('[%s] Could not send message to Gotify server: %e', f_id, bytes.decode(resp.content))
--- a/library/lodur.py
+++ b/library/lodur.py
@ -71,11 +71,11 @@ class Lodur:
                try:
                    zh_fw_ausg = datetime.strptime(
                        pdf_data['ausgerueckt'],
-                        '%H:%M',
+                        '%H:%M:%S',
                    )
                    zh_am_schad = datetime.strptime(
-                        pdf_data['anort'],
+                        pdf_data['vorort'],
-                        '%H:%M',
+                        '%H:%M:%S',
                    )
                except ValueError as err:
                    self.logger.error('[%s] Date parsing failed: %s', f_id, err)
@ -120,9 +120,9 @@ class Lodur:
                '%H:%M',
            )
            eins_ereig = pdf_data['einsatz']
-            bemerkungen = pdf_data['bemerkungen']
+            bemerkungen = pdf_data['bemerkungen'] + '\n' + pdf_data['disponierteeinheiten']
            wer_ala = pdf_data['melder']
-            adr = pdf_data['strasse'] + ', ' + pdf_data['ort']
+            adr = pdf_data['ort']
        else:
            date = datetime.now()
            time = datetime.now()
@ -229,6 +229,7 @@ class Lodur:
            # Encoding bk causes some troubles - therefore we skip that - but it
            # would be good if it would be encoded as it can / will contain f.e.abs
            # Umlauts
            # AttributeError: 'bytes' object has no attribute 'parent'
            self.logger.info('Form data: %s = %s', key, value)
            if key in ('eins_ereig', 'adr', 'wer_ala'):
                self.browser[key] = value.encode('iso-8859-1')
--- a/library/mqtt.py
+++ b/library/mqtt.py
@ -1,46 +0,0 @@
 #!/usr/bin/env python3
 """ MQTT Functions """
 import logging
 import json
 import paho.mqtt.client as mqtt
 class MQTTClient:
    """ MQTT Client """
    def __init__(self, server, username, password, base_topic):
        self.logger = logging.getLogger(__name__)
        self.logger.info('Connecting to MQTT broker %s', server)
        try:
            self.mqtt_client = mqtt.Client('pylokid')
            self.mqtt_client.username_pw_set(username, password=password)
            self.mqtt_client.tls_set()
            self.mqtt_client.connect(server, 8883, 60)
            self.mqtt_client.loop_start()
            self.logger.info('MQTT connection successful')
        except Exception as err:
            self.logger.error('MQTT connection failed: %s', str(err))
        self.base_topic = base_topic
    def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
        """ Publish a message over MQTT """
        if f_type == 'Einsatzausdruck_FW':
            try:
                topic = "{0}/Einsatzausdruck_FW/{1}/".format(self.base_topic, f_id)
                self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
                self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
                ## Publish the PDF blob
                pdf_fh = open(pdf_file, 'rb')
                pdf_binary = pdf_fh.read()
                self.mqtt_client.publish(topic + 'pdf', bytes(pdf_binary))
            except IndexError as err:
                self.logger.info('[%s] Cannot publish information: %s', f_id, err)
        elif f_type == 'Einsatzprotokoll':
            topic = "{0}/Einsatzprotokoll/{1}/".format(self.base_topic, f_id)
            self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
            self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
--- a/library/pdf_extract.py
+++ b/library/pdf_extract.py
@ -1,209 +0,0 @@
 #!/usr/bin/env python3
 """ extracts data from ELZ PDFs """
 import io
 import logging
 from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
 from pdfminer.converter import TextConverter
 from pdfminer.layout import LAParams
 from pdfminer.pdfpage import PDFPage
 class PDFHandling:
    """ PDF handling like parsing """
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        # less logging for pdfminer - more is not needed
        logger_doc = logging.getLogger('pdfminer.pdfdocument')
        logger_doc.setLevel(logging.WARNING)
        logger_page = logging.getLogger('pdfminer.pdfpage')
        logger_page.setLevel(logging.WARNING)
        logger_interp = logging.getLogger('pdfminer.pdfinterp')
        logger_interp.setLevel(logging.WARNING)
        logger_psparser = logging.getLogger('pdfminer.psparser')
        logger_psparser.setLevel(logging.WARNING)
        logger_cmapdb = logging.getLogger('pdfminer.cmapdb')
        logger_cmapdb.setLevel(logging.WARNING)
        logger_pdfparser = logging.getLogger('pdfminer.pdfparser')
        logger_pdfparser.setLevel(logging.WARNING)
    def concatenate_to_multiline_string(self, data, start, end):
        """ concatenates multiple lines to a single multiline string """
        res = ''
        counter = start
        while counter <= end:
            res += data[counter] + '\n'
            counter += 1
        return res
    def convert(self, file):
        """ converts the PDF to a multiline string """
        pagenums = set()
        manager = PDFResourceManager()
        codec = 'utf-8'
        caching = True
        output = io.StringIO()
        converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
        interpreter = PDFPageInterpreter(manager, converter)
        infile = open(file, 'rb')
        for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
            interpreter.process_page(page)
        converted_pdf = output.getvalue()
        infile.close()
        converter.close()
        output.close()
        return converted_pdf
    def extract_einsatzausdruck(self, file, f_id):
        """ extracts as many information from the parsed Einsatzausdruck as possible """
        converted = self.convert(file)
        splited = converted.splitlines()
        self.logger.debug('[%s] Parsed PDF raw:\n %s', f_id, converted)
        self.logger.debug('[%s] Line-splited PDF: %s', f_id, splited)
        # search some well-known words for later positional computation
        try:
            index_einsatzauftragfw = splited.index('Einsatzauftrag Feuerwehr')
            index_erfasser = splited.index('Erfasser')
            index_auftrag = splited.index('Auftrag')
            index_bemerkungen = splited.index('Bemerkungen')
            index_dispo = splited.index('Disponierte Einheiten')
            index_einsatz = splited.index('Einsatz')
            index_hinweis = splited.index('Hinweis')
            index_maps = splited.index('Google Maps')
        except ValueError as err:
            self.logger.error('[%s] PDF file does not look like a Einsatzausdruck: %s', f_id, err)
            return False
        # the PDF parsing not always produces the same output
        # let's define the already known output
        if index_bemerkungen == 6:
            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
            # get length of bemerkungen field
            # it lives between the line which contains 'Bemerkungen' and
            # the line 'Disponierte Einheiten'
            length_bemerkungen = index_auftrag - index_bemerkungen - 1
            erfasser = splited[index_dispo - 2]
            auftrag = splited[index_erfasser + 2]
            datum = splited[index_erfasser + 3]
            zeit = splited[index_erfasser + 4]
            einsatz = splited[index_einsatz - 6]
            sondersignal = splited[index_einsatz - 5]
            ort = splited[index_einsatz - 3]
            strasse = splited[index_einsatz - 2]
            # sometimes there is just a phone number for the field melder but on
            # the second line, so the lines vary for erfasser and melder
            if index_dispo - index_erfasser == 10:
                melder = splited[index_dispo - 4] + ', ' + splited[index_dispo - 3]
            else:
                melder = splited[index_dispo - 4]
        # BMA style
        elif index_bemerkungen == 20:
            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
            length_bemerkungen = index_dispo - index_bemerkungen - 1
            erfasser = splited[index_bemerkungen - 2]
            auftrag = splited[index_einsatzauftragfw + 2]
            datum = splited[index_einsatzauftragfw + 3]
            zeit = splited[index_einsatzauftragfw + 4]
            einsatz = splited[index_einsatz + 6]
            sondersignal = splited[index_einsatz + 7]
            ort = splited[index_einsatz + 9]
            strasse = splited[index_einsatz + 10]
            melder = 'BMA' # There is no melder on a BMA Einsatzausdruck
        elif index_bemerkungen == 21 or index_bemerkungen == 22:
            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
            length_bemerkungen = index_dispo - index_bemerkungen - 1
            erfasser = splited[index_bemerkungen - 2]
            auftrag = splited[index_erfasser + 2]
            datum = splited[index_erfasser + 3]
            zeit = splited[index_erfasser + 4]
            einsatz = splited[index_einsatz - 6]
            sondersignal = splited[index_einsatz - 5]
            ort = splited[index_einsatz - 3]
            strasse = splited[index_einsatz - 2]
            if index_bemerkungen - index_erfasser == 10:
                melder = splited[index_bemerkungen - 4] + ', ' + splited[index_bemerkungen - 3]
            else:
                melder = splited[index_bemerkungen - 4]
        elif index_bemerkungen == 24:
            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
            length_bemerkungen = index_dispo - index_bemerkungen - 1
            erfasser = splited[index_bemerkungen - 2]
            auftrag = splited[index_einsatzauftragfw + 4]
            datum = splited[index_einsatzauftragfw + 9]
            zeit = splited[index_einsatzauftragfw + 10]
            einsatz = splited[index_einsatz - 4]
            sondersignal = splited[index_einsatz - 3]
            ort = ''
            strasse = splited[index_einsatz - 2]
            melder = splited[index_dispo - 8] + ', ' + splited[index_dispo - 7]
        else:
            self.logger.error('[%s] Unknown location of Bemerkungen. Line %s', f_id, index_bemerkungen)
            return False
        # sanity check to see if we can correlate the f_id
        if f_id == auftrag:
            self.logger.info('[%s] ID matches in PDF', f_id)
        else:
            self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, auftrag)
            return False
        # try to find out if there is a hinweis
        # if yes, the difference between the indexes is 4, else it's shorter
        if index_maps - index_hinweis == 4:
            hinweis = splited[index_hinweis+2]
        else:
            hinweis = ''
        data = {
            'auftrag': auftrag,
            'datum': datum,
            'zeit': zeit,
            'melder': melder,
            'erfasser': erfasser,
            'bemerkungen': self.concatenate_to_multiline_string(
                splited,
                index_bemerkungen + 1,
                index_bemerkungen + length_bemerkungen
            ).rstrip(),
            'einsatz': einsatz,
            'sondersignal': sondersignal,
            'ort': ort.title(),
            'strasse': strasse.title(),
            #'objekt': splited[],
            'hinweis': hinweis,
        }
        return data
    def extract_einsatzprotokoll(self, file, f_id):
        """ extracts as many information from the parsed Einsatzprotokoll as possible """
        splited = self.convert(file).splitlines()
        # sanity check to see if we can correlate the f_id
        if f_id == splited[26]:
            self.logger.info('[%s] ID matches in PDF', f_id)
        else:
            self.logger.error('[%s] ID does not match in PDF', f_id)
            return False
        data = {
            'auftrag': splited[26],
            'datum': splited[25],
            'angelegt': splited[28],
            'disposition': splited[30],
            'ausgerueckt': splited[32],
            'anort': splited[33],
        }
        return data
--- a/library/pdftotext.py
+++ b/library/pdftotext.py
@ -0,0 +1,142 @@
 #!/usr/bin/env python3
 """ extracts data from ELZ PDFs using Poppler pdftotext """
 import subprocess
 import logging
 class PDFParsing:
    """ PDF parsing """
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.logger.info('PDF parsing based on pdftotext loaded')
    def extract(self, f_id, file, datafields):
        self.logger.info('[%s] parsing PDF file %s', f_id, file)
        data = {}
        for field, coordinate in datafields.items():
            # x-coordinate of the crop area top left corner
            x = coordinate['xMin']
            # y-coordinate of the crop area top left corner
            y = coordinate['yMin']
            # width of crop area in pixels
            w = coordinate['xMax'] - coordinate['xMin']
            # height of crop area in pixels
            h = coordinate['yMax'] - coordinate['yMin']
            self.logger.debug('[%s] Computed command for field %s: %s', f_id, field,
              'pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}'.format(x,y,w,h)
            )
            scrapeddata = subprocess.Popen([
                  '/usr/bin/pdftotext',
                  '-f', '1',
                  '-l', '1',
                  '-x', str(x),
                  '-y', str(y),
                  '-W', str(w),
                  '-H', str(h),
                  file,
                  '-'
                ],
                stdout=subprocess.PIPE,
                stderr=subprocess.STDOUT,
                text=True)
            stdout, _ = scrapeddata.communicate()
            ## TODO: fixup some fields (lowercase, remove unnecessary \n)
            if 'edit' in coordinate and coordinate['edit'] == 'title':
                data[field] = stdout.rstrip().title()
            else:
                data[field] = stdout.rstrip()
        # sanity check to see if we can correlate the f_id
        if f_id == data['auftrag']:
            self.logger.debug('[%s] ID matches in PDF', f_id)
            return data
        else:
            self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, data['auftrag'])
            return False
    def extract_einsatzausdruck(self, file, f_id):
        """ extracts information from Einsatzausdruck using external pdftotext """
        self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
        # Get them using 'pdftotext -bbox'
        # y = row
        # x = column: xMax 450 / 590 means full width
        coordinates = {
            'auftrag': {
                'xMin': 70, 'yMin': 47, 'xMax': 120,'yMax': 58,
            },
            'datum': {
                'xMin': 190, 'yMin': 47, 'xMax': 239, 'yMax': 58,
            },
            'zeit': {
                'xMin': 190, 'yMin': 59, 'xMax': 215, 'yMax': 70,
            },
            'melder': {
                'xMin': 304, 'yMin': 47, 'xMax': 446, 'yMax': 70, 'edit': 'title'
            },
            'erfasser':{
                'xMin': 448, 'yMin': 59, 'xMax': 478, 'yMax': 70,
            },
            # big field until "Disponierte Einheiten"
            'bemerkungen': {
                'xMin': 28, 'yMin': 112, 'xMax': 590, 'yMax': 350,
            },
            'disponierteeinheiten': {
                'xMin': 28, 'yMin': 366, 'xMax': 450, 'yMax': 376,
            },
            'einsatz': {
                'xMin': 76, 'yMin': 690, 'xMax': 450, 'yMax': 703,
            },
            'sondersignal': {
                'xMin': 76, 'yMin': 707, 'xMax': 450, 'yMax': 721,
            },
            'ort': {
                'xMin': 76, 'yMin': 732, 'xMax': 590, 'yMax': 745,
            },
            'hinweis': {
                'xMin': 76, 'yMin': 773, 'xMax': 450, 'yMax': 787,
            },
        }
        return self.extract(f_id, file, coordinates)
    def extract_einsatzprotokoll(self, file, f_id):
        """ extracts information from Einsatzprotokoll using external pdftotext """
        self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
        # Get them using 'pdftotext -bbox'
        # y = row
        # x = column: xMax 450 / 590 means full width
        coordinates = {
            'auftrag': {
                'xMin': 192, 'yMin': 132, 'xMax': 238,'yMax': 142,
            },
            'angelegt': {
                'xMin': 192, 'yMin': 294, 'xMax': 226, 'yMax': 304,
            },
            'dispo': {
                'xMin': 192, 'yMin': 312, 'xMax': 226, 'yMax': 322,
            },
            'ausgerueckt': {
                'xMin': 192, 'yMin': 331, 'xMax': 226, 'yMax': 341,
            },
            'vorort':{
                'xMin': 192, 'yMin': 348, 'xMax': 226, 'yMax': 358,
            },
        }
        return self.extract(f_id, file, coordinates)
--- a/main.py
+++ b/main.py
@ -8,13 +8,12 @@ import time
 import requests
 from dotenv import find_dotenv, load_dotenv
 from pushover import Client
 # local classes
 from library.emailhandling import EmailHandling
 from library.lodur import Lodur
-from library.mqtt import MQTTClient
+from library.pdftotext import PDFParsing
 from library.gotify import GotifyClient
 from library.pdf_extract import PDFHandling
 from library.webdav import WebDav
 # Configuration
@ -29,17 +28,13 @@ WEBDAV_USERNAME = os.getenv("WEBDAV_USERNAME")
 WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
 WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
 TMP_DIR = os.getenv("TMP_DIR", "/tmp")
 MQTT_SERVER = os.getenv("MQTT_SERVER")
 MQTT_USER = os.getenv("MQTT_USER")
 MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
 MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
 LODUR_USER = os.getenv("LODUR_USER")
 LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
 LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
 HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
-GOTIFY_URL = os.getenv("GOTIFY_URL")
+PUSHOVER_API_TOKEN = os.getenv("PUSHOVER_API_TOKEN")
-GOTIFY_TOKEN = os.getenv("GOTIFY_TOKEN")
+PUSHOVER_USER_KEY = os.getenv("PUSHOVER_USER_KEY")
-PYLOKID_VERSION = "1.2.0"
+PYLOKID_VERSION = "2.0.0"
 def main():
    """ main """
@ -77,22 +72,14 @@ def main():
        TMP_DIR,
    )
-    # Initialize MQTT Sessions
+    # Initialize Pushover
-    mqtt_client = MQTTClient(
+    pushover = Client(
-        MQTT_SERVER,
+        user_key=PUSHOVER_USER_KEY,
-        MQTT_USER,
+        api_token=PUSHOVER_API_TOKEN
        MQTT_PASSWORD,
        MQTT_BASE_TOPIC,
    )
    # Initialize Gotify
    gotify_client = GotifyClient(
        GOTIFY_URL,
        GOTIFY_TOKEN,
    )
    # Initialize PDF Parser
-    pdf = PDFHandling()
+    pdf = PDFParsing()
    # Main Loop
    while True:
@ -133,9 +120,25 @@ def main():
                            f_id,
                        )
-                        # publish Einsatz on MQTT and Gotify
+                        # publish Einsatz on Pushover
-                        mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                        logger.info(
-                        gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                            '[%s] Publishing message on Pushover', f_id
                        )
                        pushover.send_message(
                            "Einsatz {} eröffnet: {}\n\n* Ort: {}\n* Melder: {}\n* Hinweis: {}\n* {}\n\n{}\n\n{}".format(
                                f_id,
                                pdf_data['einsatz'],
                                pdf_data['ort'],
                                pdf_data['melder'].replace('\n',' '),
                                pdf_data['hinweis'],
                                pdf_data['sondersignal'],
                                pdf_data['disponierteeinheiten'],
                                pdf_data['bemerkungen'],
                            ),
                            title="Feuerwehr Einsatz",
                            url="https://www.google.com/maps/search/?api=1&query={}".format(pdf_data['ort']),
                            url_title="Ort auf Karte suchen"
                        )
                        # create new Einsatzrapport in Lodur
                        lodur_client.einsatzrapport(
@ -173,9 +176,15 @@ def main():
                        # Update entry in Lodur with parse PDF data
                        lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)
-                        # Einsatz finished - publish on MQTT and Gotify
+                        # Einsatz finished - publish on pushover
-                        mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                        logger.info(
-                        gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                            '[%s] Publishing message on Pushover', f_id
                        )
                        pushover.send_message(
                            "Einsatz {} beendet".format(f_id),
                            title="Feuerwehr Einsatz beendet",
                        )
                    else:
                        logger.error(
                            '[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,7 @@
 aioeasywebdav==2.4.0
-MechanicalSoup==0.9.0.post4
+# MechanicalSoup > 0.11.0 produces "TypeError: expected string or bytes-like
-paho-mqtt==1.3.1
+# object" on file upload
-pdfminer.six==20170720
+MechanicalSoup==0.11.0
-python-dotenv==0.7.1
+python-dotenv==0.10.3
 requests>=2.20.0
 python-pushover==0.4
--- a/test_pdf_parsing.py
+++ b/test_pdf_parsing.py
@ -1,21 +0,0 @@
 import re
 import logging
 from pprint import pprint
 from pathlib import Path
 from library.pdf_extract import PDFHandling
 PATH = '/tmp/pylokid'
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 PDF = PDFHandling()
 for path in Path(PATH).glob('**/*.pdf'):
    file = str(path)
    print(file)
    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
    print(f_id)
    pprint(PDF.extract_einsatzausdruck(file, f_id))
--- a/test_pdftotext.py
+++ b/test_pdftotext.py
@ -0,0 +1,30 @@
 import re
 import logging
 from pprint import pprint
 from pathlib import Path
 from library.pdftotext import PDFParsing
 PATH = '/home/tobru/Documents/Feuerwehr/Stab/Fourier/Einsatzdepeschen/2019'
 logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 PDF = PDFParsing()
 for path in Path(PATH).glob('**/Einsatzausdruck*.pdf'):
    file = str(path)
    print(file)
    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
    print(f_id)
    pprint(PDF.extract_einsatzausdruck(file, f_id))
 """
 for path in Path(PATH).glob('**/Einsatzprotokoll*.pdf'):
    file = str(path)
    print(file)
    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
    print(f_id)
    pprint(PDF.extract_einsatzprotokoll(file, f_id))
 """