Merge pull request #9 from tobru/rewrite_pdf_parsing

Rewrite pdf parsing
2019-09-22 22:02:55 +02:00 · 2019-09-22 22:02:55 +02:00 · d9d72ee442
parent b5c7d7b7b1 fd90e8c2e9
commit d9d72ee442
12 changed files with 227 additions and 478 deletions
--- a/6
+++ b/6
@ -1,5 +1,11 @@
 FROM python:3.7

+# Install pdftotext
+RUN set -x; \
+    apt update && \
+    apt install -y poppler-utils && \
+    rm -rf /var/lib/apt/lists/*
+
 WORKDIR /usr/src/pylokid
 COPY requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt
--- a/dashboard_client.py
+++ b/dashboard_client.py
@ -1,114 +0,0 @@
-#!/usr/bin/env python3
-
-""" The dashboard client """
-
-import os
-import logging
-import subprocess
-
-from dotenv import find_dotenv, load_dotenv
-import paho.mqtt.client as mqtt
-
-# Configuration
-load_dotenv(find_dotenv())
-MQTT_SERVER = os.getenv("MQTT_SERVER")
-MQTT_USER = os.getenv("MQTT_USER")
-MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
-MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
-CEC_ENABLED = os.getenv("CEC_ENABLED", "yes")
-TMP_DIR = os.getenv("TMP_DIR", "/tmp")
-
-# Initialization
-logging.basicConfig(
-    level=logging.DEBUG,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-LOGGER = logging.getLogger('dashboard')
-
-PIDS = {}
-
-def on_connect(client, userdata, flags, rc):
-    LOGGER.info("Connected to MQTT with result code %s", str(rc))
-
-    # Subscribing in on_connect() means that if we lose the connection and
-    # reconnect then subscriptions will be renewed.
-    client.subscribe("pylokid/#")
-
-def on_message(client, userdata, msg):
-    topic_detail = msg.topic.split("/")
-    f_id = topic_detail[2]
-    if topic_detail[1] == 'Einsatzausdruck_FW' and topic_detail[3] == 'pdf':
-        LOGGER.info("[%s] New Einsatzausdruck received", f_id)
-        file_name = TMP_DIR + "/dashboard_" + f_id + ".pdf"
-        file = open(file_name, "wb")
-        file.write(msg.payload)
-        file.close()
-
-        if f_id in PIDS:
-            LOGGER.info(
-                "[%s] Einsatzausdruck already being displayed with PID %s",
-                f_id,
-                str(PIDS[f_id])
-            )
-        else:
-            LOGGER.info("[%s] Displaying Einsatzausdruck with xpdf", f_id)
-            # TODO turn on TV with cec-client
-            process = subprocess.Popen(
-                ["/usr/bin/xpdf", "-z", "width", "-fullscreen", file_name],
-                env=dict(os.environ, DISPLAY=":0")
-            )
-            PIDS[f_id] = process.pid
-
-            if CEC_ENABLED == "yes":
-                # Check power state of TV
-                status = subprocess.run(
-                    ["/usr/bin/cec-client", "-s", "-d", "1"],
-                    stdout=subprocess.PIPE,
-                    input=b'pow 0').stdout
-                if status.splitlines()[1] == b'power status: standby':
-                    LOGGER.info("[%s] CEC power status: standby. Powering TV on", f_id)
-                    subprocess.run(
-                        ["/usr/bin/cec-client", "-s", "-d", "1"],
-                        stdout=subprocess.PIPE,
-                        input=b'on 0'
-                    )
-                else:
-                    LOGGER.info("[%s] CEC power status: probably on", f_id)
-    elif topic_detail[1] == 'Einsatzprotokoll':
-        LOGGER.info("[%s] New Einsatzprotokoll received", f_id)
-        if f_id in PIDS:
-            LOGGER.info("[%s] Killing xpdf PID %s", f_id, str(PIDS[f_id]))
-            os.kill(PIDS[f_id], 9)
-            PIDS.pop(f_id)
-        else:
-            LOGGER.info("[%s] No xpdf PID found", f_id)
-
-        if CEC_ENABLED == "yes":
-            # Turn off TV if no xpdf running anymore
-            if not PIDS:
-                LOGGER.info("[%s] No xpdf running anymore. Powering TV off", f_id)
-                subprocess.run(
-                    ["/usr/bin/cec-client", "-s", "-d", "1"],
-                    stdout=subprocess.PIPE,
-                    input=b'standby 0'
-                )
-    else:
-        LOGGER.info("[%s] Unknown", topic_detail[1])
-
-def main():
-    """ main """
-
-    mqtt_client = mqtt.Client()
-    mqtt_client.on_connect = on_connect
-    mqtt_client.on_message = on_message
-
-    mqtt_client.username_pw_set(MQTT_USER, password=MQTT_PASSWORD)
-    mqtt_client.tls_set()
-    mqtt_client.connect(MQTT_SERVER, 8883, 60)
-    mqtt_client.loop_forever()
-
-if __name__ == '__main__':
-    try:
-        main()
-    except KeyboardInterrupt:
-        print("Byebye")
--- a/dashboard_client.service
+++ b/dashboard_client.service
@ -1,14 +0,0 @@
-[Unit]
-Description=PyLokid Dashboard Client
-After=network.target
-
-[Service]
-User=pi
-Restart=always
-Environment="MQTT_SERVER=mybroker.example.com"
-Environment="MQTT_USER=myuser"
-Environment="MQTT_PASSWORD=mypassword"
-ExecStart=/usr/bin/python3 /opt/dashboard_client.py
-
-[Install]
-WantedBy=multi-user.target
--- a/library/gotify.py
+++ b/library/gotify.py
@ -1,36 +0,0 @@
-#!/usr/bin/env python3
-
-""" Gotify Functions """
-
-import logging
-import json
-from urllib.parse import urljoin
-import requests
-
-class GotifyClient:
-    """ Gotify Client """
-
-    def __init__(self, url, token):
-        self.logger = logging.getLogger(__name__)
-        self.logger.info('Gotify URL %s', url)
-
-        self.url = url
-        self.token = token
-
-    def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
-        """ Publish a message over Gotify """
-
-        requestURL = urljoin(self.url, '/message?token=' + self.token)
-
-        try:
-            resp = requests.post(requestURL, json={
-                'title': 'Einsatz ' + f_id,
-                'message': f_type,
-                'priority': 5
-            })
-        except requests.exceptions.RequestException as err:
-            self.logger.error('[%s] Could not connect to Gotify server: %e', f_id, err)
-
-        # Print request result if server returns http error code
-        if resp.status_code is not requests.codes.ok:
-            self.logger.error('[%s] Could not send message to Gotify server: %e', f_id, bytes.decode(resp.content))
--- a/library/lodur.py
+++ b/library/lodur.py
@ -71,11 +71,11 @@ class Lodur:
                try:
                    zh_fw_ausg = datetime.strptime(
                        pdf_data['ausgerueckt'],
-                        '%H:%M',
+                        '%H:%M:%S',
                    )
                    zh_am_schad = datetime.strptime(
-                        pdf_data['anort'],
-                        '%H:%M',
+                        pdf_data['vorort'],
+                        '%H:%M:%S',
                    )
                except ValueError as err:
                    self.logger.error('[%s] Date parsing failed: %s', f_id, err)
@ -120,9 +120,9 @@ class Lodur:
                '%H:%M',
            )
            eins_ereig = pdf_data['einsatz']
-            bemerkungen = pdf_data['bemerkungen']
+            bemerkungen = pdf_data['bemerkungen'] + '\n' + pdf_data['disponierteeinheiten']
            wer_ala = pdf_data['melder']
-            adr = pdf_data['strasse'] + ', ' + pdf_data['ort']
+            adr = pdf_data['ort']
        else:
            date = datetime.now()
            time = datetime.now()
@ -229,6 +229,7 @@ class Lodur:
            # Encoding bk causes some troubles - therefore we skip that - but it
            # would be good if it would be encoded as it can / will contain f.e.abs
            # Umlauts
+            # AttributeError: 'bytes' object has no attribute 'parent'
            self.logger.info('Form data: %s = %s', key, value)
            if key in ('eins_ereig', 'adr', 'wer_ala'):
                self.browser[key] = value.encode('iso-8859-1')
--- a/library/mqtt.py
+++ b/library/mqtt.py
@ -1,46 +0,0 @@
-#!/usr/bin/env python3
-
-""" MQTT Functions """
-
-import logging
-import json
-import paho.mqtt.client as mqtt
-
-class MQTTClient:
-    """ MQTT Client """
-
-    def __init__(self, server, username, password, base_topic):
-        self.logger = logging.getLogger(__name__)
-        self.logger.info('Connecting to MQTT broker %s', server)
-
-        try:
-            self.mqtt_client = mqtt.Client('pylokid')
-            self.mqtt_client.username_pw_set(username, password=password)
-            self.mqtt_client.tls_set()
-            self.mqtt_client.connect(server, 8883, 60)
-            self.mqtt_client.loop_start()
-            self.logger.info('MQTT connection successful')
-        except Exception as err:
-            self.logger.error('MQTT connection failed: %s', str(err))
-
-        self.base_topic = base_topic
-
-    def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
-        """ Publish a message over MQTT """
-
-        if f_type == 'Einsatzausdruck_FW':
-            try:
-                topic = "{0}/Einsatzausdruck_FW/{1}/".format(self.base_topic, f_id)
-                self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
-                self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
-
-                ## Publish the PDF blob
-                pdf_fh = open(pdf_file, 'rb')
-                pdf_binary = pdf_fh.read()
-                self.mqtt_client.publish(topic + 'pdf', bytes(pdf_binary))
-            except IndexError as err:
-                self.logger.info('[%s] Cannot publish information: %s', f_id, err)
-        elif f_type == 'Einsatzprotokoll':
-            topic = "{0}/Einsatzprotokoll/{1}/".format(self.base_topic, f_id)
-            self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
-            self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
--- a/library/pdf_extract.py
+++ b/library/pdf_extract.py
@ -1,209 +0,0 @@
-#!/usr/bin/env python3
-
-""" extracts data from ELZ PDFs """
-
-import io
-import logging
-from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
-from pdfminer.converter import TextConverter
-from pdfminer.layout import LAParams
-from pdfminer.pdfpage import PDFPage
-
-class PDFHandling:
-    """ PDF handling like parsing """
-
-    def __init__(self):
-        self.logger = logging.getLogger(__name__)
-
-        # less logging for pdfminer - more is not needed
-        logger_doc = logging.getLogger('pdfminer.pdfdocument')
-        logger_doc.setLevel(logging.WARNING)
-        logger_page = logging.getLogger('pdfminer.pdfpage')
-        logger_page.setLevel(logging.WARNING)
-        logger_interp = logging.getLogger('pdfminer.pdfinterp')
-        logger_interp.setLevel(logging.WARNING)
-        logger_psparser = logging.getLogger('pdfminer.psparser')
-        logger_psparser.setLevel(logging.WARNING)
-        logger_cmapdb = logging.getLogger('pdfminer.cmapdb')
-        logger_cmapdb.setLevel(logging.WARNING)
-        logger_pdfparser = logging.getLogger('pdfminer.pdfparser')
-        logger_pdfparser.setLevel(logging.WARNING)
-
-    def concatenate_to_multiline_string(self, data, start, end):
-        """ concatenates multiple lines to a single multiline string """
-
-        res = ''
-        counter = start
-        while counter <= end:
-            res += data[counter] + '\n'
-            counter += 1
-        return res
-
-    def convert(self, file):
-        """ converts the PDF to a multiline string """
-
-        pagenums = set()
-        manager = PDFResourceManager()
-        codec = 'utf-8'
-        caching = True
-
-        output = io.StringIO()
-        converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
-
-        interpreter = PDFPageInterpreter(manager, converter)
-        infile = open(file, 'rb')
-
-        for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
-            interpreter.process_page(page)
-
-        converted_pdf = output.getvalue()
-
-        infile.close()
-        converter.close()
-        output.close()
-        return converted_pdf
-
-    def extract_einsatzausdruck(self, file, f_id):
-        """ extracts as many information from the parsed Einsatzausdruck as possible """
-
-        converted = self.convert(file)
-        splited = converted.splitlines()
-
-        self.logger.debug('[%s] Parsed PDF raw:\n %s', f_id, converted)
-        self.logger.debug('[%s] Line-splited PDF: %s', f_id, splited)
-
-        # search some well-known words for later positional computation
-        try:
-            index_einsatzauftragfw = splited.index('Einsatzauftrag Feuerwehr')
-            index_erfasser = splited.index('Erfasser')
-            index_auftrag = splited.index('Auftrag')
-            index_bemerkungen = splited.index('Bemerkungen')
-            index_dispo = splited.index('Disponierte Einheiten')
-            index_einsatz = splited.index('Einsatz')
-            index_hinweis = splited.index('Hinweis')
-            index_maps = splited.index('Google Maps')
-        except ValueError as err:
-            self.logger.error('[%s] PDF file does not look like a Einsatzausdruck: %s', f_id, err)
-            return False
-
-        # the PDF parsing not always produces the same output
-        # let's define the already known output
-        if index_bemerkungen == 6:
-            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
-            # get length of bemerkungen field
-            # it lives between the line which contains 'Bemerkungen' and
-            # the line 'Disponierte Einheiten'
-            length_bemerkungen = index_auftrag - index_bemerkungen - 1
-            erfasser = splited[index_dispo - 2]
-            auftrag = splited[index_erfasser + 2]
-            datum = splited[index_erfasser + 3]
-            zeit = splited[index_erfasser + 4]
-            einsatz = splited[index_einsatz - 6]
-            sondersignal = splited[index_einsatz - 5]
-            ort = splited[index_einsatz - 3]
-            strasse = splited[index_einsatz - 2]
-            # sometimes there is just a phone number for the field melder but on
-            # the second line, so the lines vary for erfasser and melder
-            if index_dispo - index_erfasser == 10:
-                melder = splited[index_dispo - 4] + ', ' + splited[index_dispo - 3]
-            else:
-                melder = splited[index_dispo - 4]
-        # BMA style
-        elif index_bemerkungen == 20:
-            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
-            length_bemerkungen = index_dispo - index_bemerkungen - 1
-            erfasser = splited[index_bemerkungen - 2]
-            auftrag = splited[index_einsatzauftragfw + 2]
-            datum = splited[index_einsatzauftragfw + 3]
-            zeit = splited[index_einsatzauftragfw + 4]
-            einsatz = splited[index_einsatz + 6]
-            sondersignal = splited[index_einsatz + 7]
-            ort = splited[index_einsatz + 9]
-            strasse = splited[index_einsatz + 10]
-            melder = 'BMA' # There is no melder on a BMA Einsatzausdruck
-        elif index_bemerkungen == 21 or index_bemerkungen == 22:
-            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
-            length_bemerkungen = index_dispo - index_bemerkungen - 1
-            erfasser = splited[index_bemerkungen - 2]
-            auftrag = splited[index_erfasser + 2]
-            datum = splited[index_erfasser + 3]
-            zeit = splited[index_erfasser + 4]
-            einsatz = splited[index_einsatz - 6]
-            sondersignal = splited[index_einsatz - 5]
-            ort = splited[index_einsatz - 3]
-            strasse = splited[index_einsatz - 2]
-            if index_bemerkungen - index_erfasser == 10:
-                melder = splited[index_bemerkungen - 4] + ', ' + splited[index_bemerkungen - 3]
-            else:
-                melder = splited[index_bemerkungen - 4]
-        elif index_bemerkungen == 24:
-            self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
-            length_bemerkungen = index_dispo - index_bemerkungen - 1
-            erfasser = splited[index_bemerkungen - 2]
-            auftrag = splited[index_einsatzauftragfw + 4]
-            datum = splited[index_einsatzauftragfw + 9]
-            zeit = splited[index_einsatzauftragfw + 10]
-            einsatz = splited[index_einsatz - 4]
-            sondersignal = splited[index_einsatz - 3]
-            ort = ''
-            strasse = splited[index_einsatz - 2]
-            melder = splited[index_dispo - 8] + ', ' + splited[index_dispo - 7]
-        else:
-            self.logger.error('[%s] Unknown location of Bemerkungen. Line %s', f_id, index_bemerkungen)
-            return False
-
-        # sanity check to see if we can correlate the f_id
-        if f_id == auftrag:
-            self.logger.info('[%s] ID matches in PDF', f_id)
-        else:
-            self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, auftrag)
-            return False
-
-        # try to find out if there is a hinweis
-        # if yes, the difference between the indexes is 4, else it's shorter
-        if index_maps - index_hinweis == 4:
-            hinweis = splited[index_hinweis+2]
-        else:
-            hinweis = ''
-
-        data = {
-            'auftrag': auftrag,
-            'datum': datum,
-            'zeit': zeit,
-            'melder': melder,
-            'erfasser': erfasser,
-            'bemerkungen': self.concatenate_to_multiline_string(
-                splited,
-                index_bemerkungen + 1,
-                index_bemerkungen + length_bemerkungen
-            ).rstrip(),
-            'einsatz': einsatz,
-            'sondersignal': sondersignal,
-            'ort': ort.title(),
-            'strasse': strasse.title(),
-            #'objekt': splited[],
-            'hinweis': hinweis,
-        }
-        return data
-
-    def extract_einsatzprotokoll(self, file, f_id):
-        """ extracts as many information from the parsed Einsatzprotokoll as possible """
-
-        splited = self.convert(file).splitlines()
-
-        # sanity check to see if we can correlate the f_id
-        if f_id == splited[26]:
-            self.logger.info('[%s] ID matches in PDF', f_id)
-        else:
-            self.logger.error('[%s] ID does not match in PDF', f_id)
-            return False
-
-        data = {
-            'auftrag': splited[26],
-            'datum': splited[25],
-            'angelegt': splited[28],
-            'disposition': splited[30],
-            'ausgerueckt': splited[32],
-            'anort': splited[33],
-        }
-        return data
--- a/library/pdftotext.py
+++ b/library/pdftotext.py
@ -0,0 +1,142 @@
+#!/usr/bin/env python3
+
+""" extracts data from ELZ PDFs using Poppler pdftotext """
+
+import subprocess
+import logging
+
+class PDFParsing:
+    """ PDF parsing """
+
+    def __init__(self):
+        self.logger = logging.getLogger(__name__)
+        self.logger.info('PDF parsing based on pdftotext loaded')
+
+    def extract(self, f_id, file, datafields):
+
+        self.logger.info('[%s] parsing PDF file %s', f_id, file)
+
+        data = {}
+
+        for field, coordinate in datafields.items():
+
+            # x-coordinate of the crop area top left corner
+            x = coordinate['xMin']
+
+            # y-coordinate of the crop area top left corner
+            y = coordinate['yMin']
+
+            # width of crop area in pixels
+            w = coordinate['xMax'] - coordinate['xMin']
+
+            # height of crop area in pixels
+            h = coordinate['yMax'] - coordinate['yMin']
+
+            self.logger.debug('[%s] Computed command for field %s: %s', f_id, field,
+              'pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}'.format(x,y,w,h)
+            )
+
+            scrapeddata = subprocess.Popen([
+                  '/usr/bin/pdftotext',
+                  '-f', '1',
+                  '-l', '1',
+                  '-x', str(x),
+                  '-y', str(y),
+                  '-W', str(w),
+                  '-H', str(h),
+                  file,
+                  '-'
+                ],
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True)
+            stdout, _ = scrapeddata.communicate()
+
+            ## TODO: fixup some fields (lowercase, remove unnecessary \n)
+            if 'edit' in coordinate and coordinate['edit'] == 'title':
+                data[field] = stdout.rstrip().title()
+            else:
+                data[field] = stdout.rstrip()
+
+        # sanity check to see if we can correlate the f_id
+        if f_id == data['auftrag']:
+            self.logger.debug('[%s] ID matches in PDF', f_id)
+            return data
+        else:
+            self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, data['auftrag'])
+            return False
+
+    def extract_einsatzausdruck(self, file, f_id):
+        """ extracts information from Einsatzausdruck using external pdftotext """
+
+        self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
+
+        # Get them using 'pdftotext -bbox'
+        # y = row
+        # x = column: xMax 450 / 590 means full width
+        coordinates = {
+            'auftrag': {
+                'xMin': 70, 'yMin': 47, 'xMax': 120,'yMax': 58,
+            },
+            'datum': {
+                'xMin': 190, 'yMin': 47, 'xMax': 239, 'yMax': 58,
+            },
+            'zeit': {
+                'xMin': 190, 'yMin': 59, 'xMax': 215, 'yMax': 70,
+            },
+            'melder': {
+                'xMin': 304, 'yMin': 47, 'xMax': 446, 'yMax': 70, 'edit': 'title'
+            },
+            'erfasser':{
+                'xMin': 448, 'yMin': 59, 'xMax': 478, 'yMax': 70,
+            },
+            # big field until "Disponierte Einheiten"
+            'bemerkungen': {
+                'xMin': 28, 'yMin': 112, 'xMax': 590, 'yMax': 350,
+            },
+            'disponierteeinheiten': {
+                'xMin': 28, 'yMin': 366, 'xMax': 450, 'yMax': 376,
+            },
+            'einsatz': {
+                'xMin': 76, 'yMin': 690, 'xMax': 450, 'yMax': 703,
+            },
+            'sondersignal': {
+                'xMin': 76, 'yMin': 707, 'xMax': 450, 'yMax': 721,
+            },
+            'ort': {
+                'xMin': 76, 'yMin': 732, 'xMax': 590, 'yMax': 745,
+            },
+            'hinweis': {
+                'xMin': 76, 'yMin': 773, 'xMax': 450, 'yMax': 787,
+            },
+        }
+
+        return self.extract(f_id, file, coordinates)
+
+    def extract_einsatzprotokoll(self, file, f_id):
+        """ extracts information from Einsatzprotokoll using external pdftotext """
+
+        self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
+
+        # Get them using 'pdftotext -bbox'
+        # y = row
+        # x = column: xMax 450 / 590 means full width
+        coordinates = {
+            'auftrag': {
+                'xMin': 192, 'yMin': 132, 'xMax': 238,'yMax': 142,
+            },
+            'angelegt': {
+                'xMin': 192, 'yMin': 294, 'xMax': 226, 'yMax': 304,
+            },
+            'dispo': {
+                'xMin': 192, 'yMin': 312, 'xMax': 226, 'yMax': 322,
+            },
+            'ausgerueckt': {
+                'xMin': 192, 'yMin': 331, 'xMax': 226, 'yMax': 341,
+            },
+            'vorort':{
+                'xMin': 192, 'yMin': 348, 'xMax': 226, 'yMax': 358,
+            },
+        }
+
+        return self.extract(f_id, file, coordinates)
--- a/main.py
+++ b/main.py
@ -8,13 +8,12 @@ import time

 import requests
 from dotenv import find_dotenv, load_dotenv
+from pushover import Client

 # local classes
 from library.emailhandling import EmailHandling
 from library.lodur import Lodur
-from library.mqtt import MQTTClient
-from library.gotify import GotifyClient
-from library.pdf_extract import PDFHandling
+from library.pdftotext import PDFParsing
 from library.webdav import WebDav

 # Configuration
@ -29,17 +28,13 @@ WEBDAV_USERNAME = os.getenv("WEBDAV_USERNAME")
 WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
 WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
 TMP_DIR = os.getenv("TMP_DIR", "/tmp")
-MQTT_SERVER = os.getenv("MQTT_SERVER")
-MQTT_USER = os.getenv("MQTT_USER")
-MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
-MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
 LODUR_USER = os.getenv("LODUR_USER")
 LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
 LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
 HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
-GOTIFY_URL = os.getenv("GOTIFY_URL")
-GOTIFY_TOKEN = os.getenv("GOTIFY_TOKEN")
-PYLOKID_VERSION = "1.2.0"
+PUSHOVER_API_TOKEN = os.getenv("PUSHOVER_API_TOKEN")
+PUSHOVER_USER_KEY = os.getenv("PUSHOVER_USER_KEY")
+PYLOKID_VERSION = "2.0.0"

 def main():
    """ main """
@ -77,22 +72,14 @@ def main():
        TMP_DIR,
    )

-    # Initialize MQTT Sessions
-    mqtt_client = MQTTClient(
-        MQTT_SERVER,
-        MQTT_USER,
-        MQTT_PASSWORD,
-        MQTT_BASE_TOPIC,
-    )
-
-    # Initialize Gotify
-    gotify_client = GotifyClient(
-        GOTIFY_URL,
-        GOTIFY_TOKEN,
+    # Initialize Pushover
+    pushover = Client(
+        user_key=PUSHOVER_USER_KEY,
+        api_token=PUSHOVER_API_TOKEN
    )

    # Initialize PDF Parser
-    pdf = PDFHandling()
+    pdf = PDFParsing()

    # Main Loop
    while True:
@ -133,9 +120,25 @@ def main():
                            f_id,
                        )

-                        # publish Einsatz on MQTT and Gotify
-                        mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
-                        gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                        # publish Einsatz on Pushover
+                        logger.info(
+                            '[%s] Publishing message on Pushover', f_id
+                        )
+                        pushover.send_message(
+                            "Einsatz {} eröffnet: {}\n\n* Ort: {}\n* Melder: {}\n* Hinweis: {}\n* {}\n\n{}\n\n{}".format(
+                                f_id,
+                                pdf_data['einsatz'],
+                                pdf_data['ort'],
+                                pdf_data['melder'].replace('\n',' '),
+                                pdf_data['hinweis'],
+                                pdf_data['sondersignal'],
+                                pdf_data['disponierteeinheiten'],
+                                pdf_data['bemerkungen'],
+                            ),
+                            title="Feuerwehr Einsatz",
+                            url="https://www.google.com/maps/search/?api=1&query={}".format(pdf_data['ort']),
+                            url_title="Ort auf Karte suchen"
+                        )

                        # create new Einsatzrapport in Lodur
                        lodur_client.einsatzrapport(
@ -173,9 +176,15 @@ def main():
                        # Update entry in Lodur with parse PDF data
                        lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)

-                        # Einsatz finished - publish on MQTT and Gotify
-                        mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
-                        gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
+                        # Einsatz finished - publish on pushover
+                        logger.info(
+                            '[%s] Publishing message on Pushover', f_id
+                        )
+                        pushover.send_message(
+                            "Einsatz {} beendet".format(f_id),
+                            title="Feuerwehr Einsatz beendet",
+                        )
+
                    else:
                        logger.error(
                            '[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',
--- a/requirements.txt
+++ b/requirements.txt
@ -1,6 +1,7 @@
 aioeasywebdav==2.4.0
-MechanicalSoup==0.9.0.post4
-paho-mqtt==1.3.1
-pdfminer.six==20170720
-python-dotenv==0.7.1
+# MechanicalSoup > 0.11.0 produces "TypeError: expected string or bytes-like
+# object" on file upload
+MechanicalSoup==0.11.0
+python-dotenv==0.10.3
 requests>=2.20.0
+python-pushover==0.4
--- a/test_pdf_parsing.py
+++ b/test_pdf_parsing.py
@ -1,21 +0,0 @@
-import re
-import logging
-from pprint import pprint
-from pathlib import Path
-from library.pdf_extract import PDFHandling
-
-PATH = '/tmp/pylokid'
-
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-
-PDF = PDFHandling()
-
-for path in Path(PATH).glob('**/*.pdf'):
-    file = str(path)
-    print(file)
-    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
-    print(f_id)
-    pprint(PDF.extract_einsatzausdruck(file, f_id))
--- a/test_pdftotext.py
+++ b/test_pdftotext.py
@ -0,0 +1,30 @@
+import re
+import logging
+from pprint import pprint
+from pathlib import Path
+from library.pdftotext import PDFParsing
+
+PATH = '/home/tobru/Documents/Feuerwehr/Stab/Fourier/Einsatzdepeschen/2019'
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+PDF = PDFParsing()
+
+for path in Path(PATH).glob('**/Einsatzausdruck*.pdf'):
+    file = str(path)
+    print(file)
+    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
+    print(f_id)
+    pprint(PDF.extract_einsatzausdruck(file, f_id))
+
+"""
+for path in Path(PATH).glob('**/Einsatzprotokoll*.pdf'):
+    file = str(path)
+    print(file)
+    f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
+    print(f_id)
+    pprint(PDF.extract_einsatzprotokoll(file, f_id))
+"""