Merge pull request #9 from tobru/rewrite_pdf_parsing
Rewrite pdf parsing
This commit is contained in:
commit
d9d72ee442
|
@ -1,5 +1,11 @@
|
||||||
FROM python:3.7
|
FROM python:3.7
|
||||||
|
|
||||||
|
# Install pdftotext
|
||||||
|
RUN set -x; \
|
||||||
|
apt update && \
|
||||||
|
apt install -y poppler-utils && \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
WORKDIR /usr/src/pylokid
|
WORKDIR /usr/src/pylokid
|
||||||
COPY requirements.txt ./
|
COPY requirements.txt ./
|
||||||
RUN pip install --no-cache-dir -r requirements.txt
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
|
@ -1,114 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
""" The dashboard client """
|
|
||||||
|
|
||||||
import os
|
|
||||||
import logging
|
|
||||||
import subprocess
|
|
||||||
|
|
||||||
from dotenv import find_dotenv, load_dotenv
|
|
||||||
import paho.mqtt.client as mqtt
|
|
||||||
|
|
||||||
# Configuration
|
|
||||||
load_dotenv(find_dotenv())
|
|
||||||
MQTT_SERVER = os.getenv("MQTT_SERVER")
|
|
||||||
MQTT_USER = os.getenv("MQTT_USER")
|
|
||||||
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
|
|
||||||
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
|
|
||||||
CEC_ENABLED = os.getenv("CEC_ENABLED", "yes")
|
|
||||||
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
|
|
||||||
|
|
||||||
# Initialization
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.DEBUG,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
LOGGER = logging.getLogger('dashboard')
|
|
||||||
|
|
||||||
PIDS = {}
|
|
||||||
|
|
||||||
def on_connect(client, userdata, flags, rc):
|
|
||||||
LOGGER.info("Connected to MQTT with result code %s", str(rc))
|
|
||||||
|
|
||||||
# Subscribing in on_connect() means that if we lose the connection and
|
|
||||||
# reconnect then subscriptions will be renewed.
|
|
||||||
client.subscribe("pylokid/#")
|
|
||||||
|
|
||||||
def on_message(client, userdata, msg):
|
|
||||||
topic_detail = msg.topic.split("/")
|
|
||||||
f_id = topic_detail[2]
|
|
||||||
if topic_detail[1] == 'Einsatzausdruck_FW' and topic_detail[3] == 'pdf':
|
|
||||||
LOGGER.info("[%s] New Einsatzausdruck received", f_id)
|
|
||||||
file_name = TMP_DIR + "/dashboard_" + f_id + ".pdf"
|
|
||||||
file = open(file_name, "wb")
|
|
||||||
file.write(msg.payload)
|
|
||||||
file.close()
|
|
||||||
|
|
||||||
if f_id in PIDS:
|
|
||||||
LOGGER.info(
|
|
||||||
"[%s] Einsatzausdruck already being displayed with PID %s",
|
|
||||||
f_id,
|
|
||||||
str(PIDS[f_id])
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
LOGGER.info("[%s] Displaying Einsatzausdruck with xpdf", f_id)
|
|
||||||
# TODO turn on TV with cec-client
|
|
||||||
process = subprocess.Popen(
|
|
||||||
["/usr/bin/xpdf", "-z", "width", "-fullscreen", file_name],
|
|
||||||
env=dict(os.environ, DISPLAY=":0")
|
|
||||||
)
|
|
||||||
PIDS[f_id] = process.pid
|
|
||||||
|
|
||||||
if CEC_ENABLED == "yes":
|
|
||||||
# Check power state of TV
|
|
||||||
status = subprocess.run(
|
|
||||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
input=b'pow 0').stdout
|
|
||||||
if status.splitlines()[1] == b'power status: standby':
|
|
||||||
LOGGER.info("[%s] CEC power status: standby. Powering TV on", f_id)
|
|
||||||
subprocess.run(
|
|
||||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
input=b'on 0'
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
LOGGER.info("[%s] CEC power status: probably on", f_id)
|
|
||||||
elif topic_detail[1] == 'Einsatzprotokoll':
|
|
||||||
LOGGER.info("[%s] New Einsatzprotokoll received", f_id)
|
|
||||||
if f_id in PIDS:
|
|
||||||
LOGGER.info("[%s] Killing xpdf PID %s", f_id, str(PIDS[f_id]))
|
|
||||||
os.kill(PIDS[f_id], 9)
|
|
||||||
PIDS.pop(f_id)
|
|
||||||
else:
|
|
||||||
LOGGER.info("[%s] No xpdf PID found", f_id)
|
|
||||||
|
|
||||||
if CEC_ENABLED == "yes":
|
|
||||||
# Turn off TV if no xpdf running anymore
|
|
||||||
if not PIDS:
|
|
||||||
LOGGER.info("[%s] No xpdf running anymore. Powering TV off", f_id)
|
|
||||||
subprocess.run(
|
|
||||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
input=b'standby 0'
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
LOGGER.info("[%s] Unknown", topic_detail[1])
|
|
||||||
|
|
||||||
def main():
|
|
||||||
""" main """
|
|
||||||
|
|
||||||
mqtt_client = mqtt.Client()
|
|
||||||
mqtt_client.on_connect = on_connect
|
|
||||||
mqtt_client.on_message = on_message
|
|
||||||
|
|
||||||
mqtt_client.username_pw_set(MQTT_USER, password=MQTT_PASSWORD)
|
|
||||||
mqtt_client.tls_set()
|
|
||||||
mqtt_client.connect(MQTT_SERVER, 8883, 60)
|
|
||||||
mqtt_client.loop_forever()
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
try:
|
|
||||||
main()
|
|
||||||
except KeyboardInterrupt:
|
|
||||||
print("Byebye")
|
|
|
@ -1,14 +0,0 @@
|
||||||
[Unit]
|
|
||||||
Description=PyLokid Dashboard Client
|
|
||||||
After=network.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
User=pi
|
|
||||||
Restart=always
|
|
||||||
Environment="MQTT_SERVER=mybroker.example.com"
|
|
||||||
Environment="MQTT_USER=myuser"
|
|
||||||
Environment="MQTT_PASSWORD=mypassword"
|
|
||||||
ExecStart=/usr/bin/python3 /opt/dashboard_client.py
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
|
@ -1,36 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
""" Gotify Functions """
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
from urllib.parse import urljoin
|
|
||||||
import requests
|
|
||||||
|
|
||||||
class GotifyClient:
|
|
||||||
""" Gotify Client """
|
|
||||||
|
|
||||||
def __init__(self, url, token):
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
self.logger.info('Gotify URL %s', url)
|
|
||||||
|
|
||||||
self.url = url
|
|
||||||
self.token = token
|
|
||||||
|
|
||||||
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
|
|
||||||
""" Publish a message over Gotify """
|
|
||||||
|
|
||||||
requestURL = urljoin(self.url, '/message?token=' + self.token)
|
|
||||||
|
|
||||||
try:
|
|
||||||
resp = requests.post(requestURL, json={
|
|
||||||
'title': 'Einsatz ' + f_id,
|
|
||||||
'message': f_type,
|
|
||||||
'priority': 5
|
|
||||||
})
|
|
||||||
except requests.exceptions.RequestException as err:
|
|
||||||
self.logger.error('[%s] Could not connect to Gotify server: %e', f_id, err)
|
|
||||||
|
|
||||||
# Print request result if server returns http error code
|
|
||||||
if resp.status_code is not requests.codes.ok:
|
|
||||||
self.logger.error('[%s] Could not send message to Gotify server: %e', f_id, bytes.decode(resp.content))
|
|
|
@ -71,11 +71,11 @@ class Lodur:
|
||||||
try:
|
try:
|
||||||
zh_fw_ausg = datetime.strptime(
|
zh_fw_ausg = datetime.strptime(
|
||||||
pdf_data['ausgerueckt'],
|
pdf_data['ausgerueckt'],
|
||||||
'%H:%M',
|
'%H:%M:%S',
|
||||||
)
|
)
|
||||||
zh_am_schad = datetime.strptime(
|
zh_am_schad = datetime.strptime(
|
||||||
pdf_data['anort'],
|
pdf_data['vorort'],
|
||||||
'%H:%M',
|
'%H:%M:%S',
|
||||||
)
|
)
|
||||||
except ValueError as err:
|
except ValueError as err:
|
||||||
self.logger.error('[%s] Date parsing failed: %s', f_id, err)
|
self.logger.error('[%s] Date parsing failed: %s', f_id, err)
|
||||||
|
@ -120,9 +120,9 @@ class Lodur:
|
||||||
'%H:%M',
|
'%H:%M',
|
||||||
)
|
)
|
||||||
eins_ereig = pdf_data['einsatz']
|
eins_ereig = pdf_data['einsatz']
|
||||||
bemerkungen = pdf_data['bemerkungen']
|
bemerkungen = pdf_data['bemerkungen'] + '\n' + pdf_data['disponierteeinheiten']
|
||||||
wer_ala = pdf_data['melder']
|
wer_ala = pdf_data['melder']
|
||||||
adr = pdf_data['strasse'] + ', ' + pdf_data['ort']
|
adr = pdf_data['ort']
|
||||||
else:
|
else:
|
||||||
date = datetime.now()
|
date = datetime.now()
|
||||||
time = datetime.now()
|
time = datetime.now()
|
||||||
|
@ -229,6 +229,7 @@ class Lodur:
|
||||||
# Encoding bk causes some troubles - therefore we skip that - but it
|
# Encoding bk causes some troubles - therefore we skip that - but it
|
||||||
# would be good if it would be encoded as it can / will contain f.e.abs
|
# would be good if it would be encoded as it can / will contain f.e.abs
|
||||||
# Umlauts
|
# Umlauts
|
||||||
|
# AttributeError: 'bytes' object has no attribute 'parent'
|
||||||
self.logger.info('Form data: %s = %s', key, value)
|
self.logger.info('Form data: %s = %s', key, value)
|
||||||
if key in ('eins_ereig', 'adr', 'wer_ala'):
|
if key in ('eins_ereig', 'adr', 'wer_ala'):
|
||||||
self.browser[key] = value.encode('iso-8859-1')
|
self.browser[key] = value.encode('iso-8859-1')
|
||||||
|
|
|
@ -1,46 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
""" MQTT Functions """
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import json
|
|
||||||
import paho.mqtt.client as mqtt
|
|
||||||
|
|
||||||
class MQTTClient:
|
|
||||||
""" MQTT Client """
|
|
||||||
|
|
||||||
def __init__(self, server, username, password, base_topic):
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
self.logger.info('Connecting to MQTT broker %s', server)
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.mqtt_client = mqtt.Client('pylokid')
|
|
||||||
self.mqtt_client.username_pw_set(username, password=password)
|
|
||||||
self.mqtt_client.tls_set()
|
|
||||||
self.mqtt_client.connect(server, 8883, 60)
|
|
||||||
self.mqtt_client.loop_start()
|
|
||||||
self.logger.info('MQTT connection successful')
|
|
||||||
except Exception as err:
|
|
||||||
self.logger.error('MQTT connection failed: %s', str(err))
|
|
||||||
|
|
||||||
self.base_topic = base_topic
|
|
||||||
|
|
||||||
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
|
|
||||||
""" Publish a message over MQTT """
|
|
||||||
|
|
||||||
if f_type == 'Einsatzausdruck_FW':
|
|
||||||
try:
|
|
||||||
topic = "{0}/Einsatzausdruck_FW/{1}/".format(self.base_topic, f_id)
|
|
||||||
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
|
|
||||||
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
|
|
||||||
|
|
||||||
## Publish the PDF blob
|
|
||||||
pdf_fh = open(pdf_file, 'rb')
|
|
||||||
pdf_binary = pdf_fh.read()
|
|
||||||
self.mqtt_client.publish(topic + 'pdf', bytes(pdf_binary))
|
|
||||||
except IndexError as err:
|
|
||||||
self.logger.info('[%s] Cannot publish information: %s', f_id, err)
|
|
||||||
elif f_type == 'Einsatzprotokoll':
|
|
||||||
topic = "{0}/Einsatzprotokoll/{1}/".format(self.base_topic, f_id)
|
|
||||||
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
|
|
||||||
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
|
|
|
@ -1,209 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
""" extracts data from ELZ PDFs """
|
|
||||||
|
|
||||||
import io
|
|
||||||
import logging
|
|
||||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
||||||
from pdfminer.converter import TextConverter
|
|
||||||
from pdfminer.layout import LAParams
|
|
||||||
from pdfminer.pdfpage import PDFPage
|
|
||||||
|
|
||||||
class PDFHandling:
|
|
||||||
""" PDF handling like parsing """
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# less logging for pdfminer - more is not needed
|
|
||||||
logger_doc = logging.getLogger('pdfminer.pdfdocument')
|
|
||||||
logger_doc.setLevel(logging.WARNING)
|
|
||||||
logger_page = logging.getLogger('pdfminer.pdfpage')
|
|
||||||
logger_page.setLevel(logging.WARNING)
|
|
||||||
logger_interp = logging.getLogger('pdfminer.pdfinterp')
|
|
||||||
logger_interp.setLevel(logging.WARNING)
|
|
||||||
logger_psparser = logging.getLogger('pdfminer.psparser')
|
|
||||||
logger_psparser.setLevel(logging.WARNING)
|
|
||||||
logger_cmapdb = logging.getLogger('pdfminer.cmapdb')
|
|
||||||
logger_cmapdb.setLevel(logging.WARNING)
|
|
||||||
logger_pdfparser = logging.getLogger('pdfminer.pdfparser')
|
|
||||||
logger_pdfparser.setLevel(logging.WARNING)
|
|
||||||
|
|
||||||
def concatenate_to_multiline_string(self, data, start, end):
|
|
||||||
""" concatenates multiple lines to a single multiline string """
|
|
||||||
|
|
||||||
res = ''
|
|
||||||
counter = start
|
|
||||||
while counter <= end:
|
|
||||||
res += data[counter] + '\n'
|
|
||||||
counter += 1
|
|
||||||
return res
|
|
||||||
|
|
||||||
def convert(self, file):
|
|
||||||
""" converts the PDF to a multiline string """
|
|
||||||
|
|
||||||
pagenums = set()
|
|
||||||
manager = PDFResourceManager()
|
|
||||||
codec = 'utf-8'
|
|
||||||
caching = True
|
|
||||||
|
|
||||||
output = io.StringIO()
|
|
||||||
converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
|
|
||||||
|
|
||||||
interpreter = PDFPageInterpreter(manager, converter)
|
|
||||||
infile = open(file, 'rb')
|
|
||||||
|
|
||||||
for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
|
|
||||||
interpreter.process_page(page)
|
|
||||||
|
|
||||||
converted_pdf = output.getvalue()
|
|
||||||
|
|
||||||
infile.close()
|
|
||||||
converter.close()
|
|
||||||
output.close()
|
|
||||||
return converted_pdf
|
|
||||||
|
|
||||||
def extract_einsatzausdruck(self, file, f_id):
|
|
||||||
""" extracts as many information from the parsed Einsatzausdruck as possible """
|
|
||||||
|
|
||||||
converted = self.convert(file)
|
|
||||||
splited = converted.splitlines()
|
|
||||||
|
|
||||||
self.logger.debug('[%s] Parsed PDF raw:\n %s', f_id, converted)
|
|
||||||
self.logger.debug('[%s] Line-splited PDF: %s', f_id, splited)
|
|
||||||
|
|
||||||
# search some well-known words for later positional computation
|
|
||||||
try:
|
|
||||||
index_einsatzauftragfw = splited.index('Einsatzauftrag Feuerwehr')
|
|
||||||
index_erfasser = splited.index('Erfasser')
|
|
||||||
index_auftrag = splited.index('Auftrag')
|
|
||||||
index_bemerkungen = splited.index('Bemerkungen')
|
|
||||||
index_dispo = splited.index('Disponierte Einheiten')
|
|
||||||
index_einsatz = splited.index('Einsatz')
|
|
||||||
index_hinweis = splited.index('Hinweis')
|
|
||||||
index_maps = splited.index('Google Maps')
|
|
||||||
except ValueError as err:
|
|
||||||
self.logger.error('[%s] PDF file does not look like a Einsatzausdruck: %s', f_id, err)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# the PDF parsing not always produces the same output
|
|
||||||
# let's define the already known output
|
|
||||||
if index_bemerkungen == 6:
|
|
||||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
|
||||||
# get length of bemerkungen field
|
|
||||||
# it lives between the line which contains 'Bemerkungen' and
|
|
||||||
# the line 'Disponierte Einheiten'
|
|
||||||
length_bemerkungen = index_auftrag - index_bemerkungen - 1
|
|
||||||
erfasser = splited[index_dispo - 2]
|
|
||||||
auftrag = splited[index_erfasser + 2]
|
|
||||||
datum = splited[index_erfasser + 3]
|
|
||||||
zeit = splited[index_erfasser + 4]
|
|
||||||
einsatz = splited[index_einsatz - 6]
|
|
||||||
sondersignal = splited[index_einsatz - 5]
|
|
||||||
ort = splited[index_einsatz - 3]
|
|
||||||
strasse = splited[index_einsatz - 2]
|
|
||||||
# sometimes there is just a phone number for the field melder but on
|
|
||||||
# the second line, so the lines vary for erfasser and melder
|
|
||||||
if index_dispo - index_erfasser == 10:
|
|
||||||
melder = splited[index_dispo - 4] + ', ' + splited[index_dispo - 3]
|
|
||||||
else:
|
|
||||||
melder = splited[index_dispo - 4]
|
|
||||||
# BMA style
|
|
||||||
elif index_bemerkungen == 20:
|
|
||||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
|
||||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
|
||||||
erfasser = splited[index_bemerkungen - 2]
|
|
||||||
auftrag = splited[index_einsatzauftragfw + 2]
|
|
||||||
datum = splited[index_einsatzauftragfw + 3]
|
|
||||||
zeit = splited[index_einsatzauftragfw + 4]
|
|
||||||
einsatz = splited[index_einsatz + 6]
|
|
||||||
sondersignal = splited[index_einsatz + 7]
|
|
||||||
ort = splited[index_einsatz + 9]
|
|
||||||
strasse = splited[index_einsatz + 10]
|
|
||||||
melder = 'BMA' # There is no melder on a BMA Einsatzausdruck
|
|
||||||
elif index_bemerkungen == 21 or index_bemerkungen == 22:
|
|
||||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
|
||||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
|
||||||
erfasser = splited[index_bemerkungen - 2]
|
|
||||||
auftrag = splited[index_erfasser + 2]
|
|
||||||
datum = splited[index_erfasser + 3]
|
|
||||||
zeit = splited[index_erfasser + 4]
|
|
||||||
einsatz = splited[index_einsatz - 6]
|
|
||||||
sondersignal = splited[index_einsatz - 5]
|
|
||||||
ort = splited[index_einsatz - 3]
|
|
||||||
strasse = splited[index_einsatz - 2]
|
|
||||||
if index_bemerkungen - index_erfasser == 10:
|
|
||||||
melder = splited[index_bemerkungen - 4] + ', ' + splited[index_bemerkungen - 3]
|
|
||||||
else:
|
|
||||||
melder = splited[index_bemerkungen - 4]
|
|
||||||
elif index_bemerkungen == 24:
|
|
||||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
|
||||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
|
||||||
erfasser = splited[index_bemerkungen - 2]
|
|
||||||
auftrag = splited[index_einsatzauftragfw + 4]
|
|
||||||
datum = splited[index_einsatzauftragfw + 9]
|
|
||||||
zeit = splited[index_einsatzauftragfw + 10]
|
|
||||||
einsatz = splited[index_einsatz - 4]
|
|
||||||
sondersignal = splited[index_einsatz - 3]
|
|
||||||
ort = ''
|
|
||||||
strasse = splited[index_einsatz - 2]
|
|
||||||
melder = splited[index_dispo - 8] + ', ' + splited[index_dispo - 7]
|
|
||||||
else:
|
|
||||||
self.logger.error('[%s] Unknown location of Bemerkungen. Line %s', f_id, index_bemerkungen)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# sanity check to see if we can correlate the f_id
|
|
||||||
if f_id == auftrag:
|
|
||||||
self.logger.info('[%s] ID matches in PDF', f_id)
|
|
||||||
else:
|
|
||||||
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, auftrag)
|
|
||||||
return False
|
|
||||||
|
|
||||||
# try to find out if there is a hinweis
|
|
||||||
# if yes, the difference between the indexes is 4, else it's shorter
|
|
||||||
if index_maps - index_hinweis == 4:
|
|
||||||
hinweis = splited[index_hinweis+2]
|
|
||||||
else:
|
|
||||||
hinweis = ''
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'auftrag': auftrag,
|
|
||||||
'datum': datum,
|
|
||||||
'zeit': zeit,
|
|
||||||
'melder': melder,
|
|
||||||
'erfasser': erfasser,
|
|
||||||
'bemerkungen': self.concatenate_to_multiline_string(
|
|
||||||
splited,
|
|
||||||
index_bemerkungen + 1,
|
|
||||||
index_bemerkungen + length_bemerkungen
|
|
||||||
).rstrip(),
|
|
||||||
'einsatz': einsatz,
|
|
||||||
'sondersignal': sondersignal,
|
|
||||||
'ort': ort.title(),
|
|
||||||
'strasse': strasse.title(),
|
|
||||||
#'objekt': splited[],
|
|
||||||
'hinweis': hinweis,
|
|
||||||
}
|
|
||||||
return data
|
|
||||||
|
|
||||||
def extract_einsatzprotokoll(self, file, f_id):
|
|
||||||
""" extracts as many information from the parsed Einsatzprotokoll as possible """
|
|
||||||
|
|
||||||
splited = self.convert(file).splitlines()
|
|
||||||
|
|
||||||
# sanity check to see if we can correlate the f_id
|
|
||||||
if f_id == splited[26]:
|
|
||||||
self.logger.info('[%s] ID matches in PDF', f_id)
|
|
||||||
else:
|
|
||||||
self.logger.error('[%s] ID does not match in PDF', f_id)
|
|
||||||
return False
|
|
||||||
|
|
||||||
data = {
|
|
||||||
'auftrag': splited[26],
|
|
||||||
'datum': splited[25],
|
|
||||||
'angelegt': splited[28],
|
|
||||||
'disposition': splited[30],
|
|
||||||
'ausgerueckt': splited[32],
|
|
||||||
'anort': splited[33],
|
|
||||||
}
|
|
||||||
return data
|
|
|
@ -0,0 +1,142 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
""" extracts data from ELZ PDFs using Poppler pdftotext """
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
import logging
|
||||||
|
|
||||||
|
class PDFParsing:
|
||||||
|
""" PDF parsing """
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
self.logger.info('PDF parsing based on pdftotext loaded')
|
||||||
|
|
||||||
|
def extract(self, f_id, file, datafields):
|
||||||
|
|
||||||
|
self.logger.info('[%s] parsing PDF file %s', f_id, file)
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
for field, coordinate in datafields.items():
|
||||||
|
|
||||||
|
# x-coordinate of the crop area top left corner
|
||||||
|
x = coordinate['xMin']
|
||||||
|
|
||||||
|
# y-coordinate of the crop area top left corner
|
||||||
|
y = coordinate['yMin']
|
||||||
|
|
||||||
|
# width of crop area in pixels
|
||||||
|
w = coordinate['xMax'] - coordinate['xMin']
|
||||||
|
|
||||||
|
# height of crop area in pixels
|
||||||
|
h = coordinate['yMax'] - coordinate['yMin']
|
||||||
|
|
||||||
|
self.logger.debug('[%s] Computed command for field %s: %s', f_id, field,
|
||||||
|
'pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}'.format(x,y,w,h)
|
||||||
|
)
|
||||||
|
|
||||||
|
scrapeddata = subprocess.Popen([
|
||||||
|
'/usr/bin/pdftotext',
|
||||||
|
'-f', '1',
|
||||||
|
'-l', '1',
|
||||||
|
'-x', str(x),
|
||||||
|
'-y', str(y),
|
||||||
|
'-W', str(w),
|
||||||
|
'-H', str(h),
|
||||||
|
file,
|
||||||
|
'-'
|
||||||
|
],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stderr=subprocess.STDOUT,
|
||||||
|
text=True)
|
||||||
|
stdout, _ = scrapeddata.communicate()
|
||||||
|
|
||||||
|
## TODO: fixup some fields (lowercase, remove unnecessary \n)
|
||||||
|
if 'edit' in coordinate and coordinate['edit'] == 'title':
|
||||||
|
data[field] = stdout.rstrip().title()
|
||||||
|
else:
|
||||||
|
data[field] = stdout.rstrip()
|
||||||
|
|
||||||
|
# sanity check to see if we can correlate the f_id
|
||||||
|
if f_id == data['auftrag']:
|
||||||
|
self.logger.debug('[%s] ID matches in PDF', f_id)
|
||||||
|
return data
|
||||||
|
else:
|
||||||
|
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, data['auftrag'])
|
||||||
|
return False
|
||||||
|
|
||||||
|
def extract_einsatzausdruck(self, file, f_id):
|
||||||
|
""" extracts information from Einsatzausdruck using external pdftotext """
|
||||||
|
|
||||||
|
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
|
||||||
|
|
||||||
|
# Get them using 'pdftotext -bbox'
|
||||||
|
# y = row
|
||||||
|
# x = column: xMax 450 / 590 means full width
|
||||||
|
coordinates = {
|
||||||
|
'auftrag': {
|
||||||
|
'xMin': 70, 'yMin': 47, 'xMax': 120,'yMax': 58,
|
||||||
|
},
|
||||||
|
'datum': {
|
||||||
|
'xMin': 190, 'yMin': 47, 'xMax': 239, 'yMax': 58,
|
||||||
|
},
|
||||||
|
'zeit': {
|
||||||
|
'xMin': 190, 'yMin': 59, 'xMax': 215, 'yMax': 70,
|
||||||
|
},
|
||||||
|
'melder': {
|
||||||
|
'xMin': 304, 'yMin': 47, 'xMax': 446, 'yMax': 70, 'edit': 'title'
|
||||||
|
},
|
||||||
|
'erfasser':{
|
||||||
|
'xMin': 448, 'yMin': 59, 'xMax': 478, 'yMax': 70,
|
||||||
|
},
|
||||||
|
# big field until "Disponierte Einheiten"
|
||||||
|
'bemerkungen': {
|
||||||
|
'xMin': 28, 'yMin': 112, 'xMax': 590, 'yMax': 350,
|
||||||
|
},
|
||||||
|
'disponierteeinheiten': {
|
||||||
|
'xMin': 28, 'yMin': 366, 'xMax': 450, 'yMax': 376,
|
||||||
|
},
|
||||||
|
'einsatz': {
|
||||||
|
'xMin': 76, 'yMin': 690, 'xMax': 450, 'yMax': 703,
|
||||||
|
},
|
||||||
|
'sondersignal': {
|
||||||
|
'xMin': 76, 'yMin': 707, 'xMax': 450, 'yMax': 721,
|
||||||
|
},
|
||||||
|
'ort': {
|
||||||
|
'xMin': 76, 'yMin': 732, 'xMax': 590, 'yMax': 745,
|
||||||
|
},
|
||||||
|
'hinweis': {
|
||||||
|
'xMin': 76, 'yMin': 773, 'xMax': 450, 'yMax': 787,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.extract(f_id, file, coordinates)
|
||||||
|
|
||||||
|
def extract_einsatzprotokoll(self, file, f_id):
|
||||||
|
""" extracts information from Einsatzprotokoll using external pdftotext """
|
||||||
|
|
||||||
|
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
|
||||||
|
|
||||||
|
# Get them using 'pdftotext -bbox'
|
||||||
|
# y = row
|
||||||
|
# x = column: xMax 450 / 590 means full width
|
||||||
|
coordinates = {
|
||||||
|
'auftrag': {
|
||||||
|
'xMin': 192, 'yMin': 132, 'xMax': 238,'yMax': 142,
|
||||||
|
},
|
||||||
|
'angelegt': {
|
||||||
|
'xMin': 192, 'yMin': 294, 'xMax': 226, 'yMax': 304,
|
||||||
|
},
|
||||||
|
'dispo': {
|
||||||
|
'xMin': 192, 'yMin': 312, 'xMax': 226, 'yMax': 322,
|
||||||
|
},
|
||||||
|
'ausgerueckt': {
|
||||||
|
'xMin': 192, 'yMin': 331, 'xMax': 226, 'yMax': 341,
|
||||||
|
},
|
||||||
|
'vorort':{
|
||||||
|
'xMin': 192, 'yMin': 348, 'xMax': 226, 'yMax': 358,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return self.extract(f_id, file, coordinates)
|
67
main.py
67
main.py
|
@ -8,13 +8,12 @@ import time
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
from dotenv import find_dotenv, load_dotenv
|
from dotenv import find_dotenv, load_dotenv
|
||||||
|
from pushover import Client
|
||||||
|
|
||||||
# local classes
|
# local classes
|
||||||
from library.emailhandling import EmailHandling
|
from library.emailhandling import EmailHandling
|
||||||
from library.lodur import Lodur
|
from library.lodur import Lodur
|
||||||
from library.mqtt import MQTTClient
|
from library.pdftotext import PDFParsing
|
||||||
from library.gotify import GotifyClient
|
|
||||||
from library.pdf_extract import PDFHandling
|
|
||||||
from library.webdav import WebDav
|
from library.webdav import WebDav
|
||||||
|
|
||||||
# Configuration
|
# Configuration
|
||||||
|
@ -29,17 +28,13 @@ WEBDAV_USERNAME = os.getenv("WEBDAV_USERNAME")
|
||||||
WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
|
WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
|
||||||
WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
|
WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
|
||||||
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
|
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
|
||||||
MQTT_SERVER = os.getenv("MQTT_SERVER")
|
|
||||||
MQTT_USER = os.getenv("MQTT_USER")
|
|
||||||
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
|
|
||||||
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
|
|
||||||
LODUR_USER = os.getenv("LODUR_USER")
|
LODUR_USER = os.getenv("LODUR_USER")
|
||||||
LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
|
LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
|
||||||
LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
|
LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
|
||||||
HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
|
HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
|
||||||
GOTIFY_URL = os.getenv("GOTIFY_URL")
|
PUSHOVER_API_TOKEN = os.getenv("PUSHOVER_API_TOKEN")
|
||||||
GOTIFY_TOKEN = os.getenv("GOTIFY_TOKEN")
|
PUSHOVER_USER_KEY = os.getenv("PUSHOVER_USER_KEY")
|
||||||
PYLOKID_VERSION = "1.2.0"
|
PYLOKID_VERSION = "2.0.0"
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
""" main """
|
""" main """
|
||||||
|
@ -77,22 +72,14 @@ def main():
|
||||||
TMP_DIR,
|
TMP_DIR,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize MQTT Sessions
|
# Initialize Pushover
|
||||||
mqtt_client = MQTTClient(
|
pushover = Client(
|
||||||
MQTT_SERVER,
|
user_key=PUSHOVER_USER_KEY,
|
||||||
MQTT_USER,
|
api_token=PUSHOVER_API_TOKEN
|
||||||
MQTT_PASSWORD,
|
|
||||||
MQTT_BASE_TOPIC,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Initialize Gotify
|
|
||||||
gotify_client = GotifyClient(
|
|
||||||
GOTIFY_URL,
|
|
||||||
GOTIFY_TOKEN,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize PDF Parser
|
# Initialize PDF Parser
|
||||||
pdf = PDFHandling()
|
pdf = PDFParsing()
|
||||||
|
|
||||||
# Main Loop
|
# Main Loop
|
||||||
while True:
|
while True:
|
||||||
|
@ -133,9 +120,25 @@ def main():
|
||||||
f_id,
|
f_id,
|
||||||
)
|
)
|
||||||
|
|
||||||
# publish Einsatz on MQTT and Gotify
|
# publish Einsatz on Pushover
|
||||||
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
logger.info(
|
||||||
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
'[%s] Publishing message on Pushover', f_id
|
||||||
|
)
|
||||||
|
pushover.send_message(
|
||||||
|
"Einsatz {} eröffnet: {}\n\n* Ort: {}\n* Melder: {}\n* Hinweis: {}\n* {}\n\n{}\n\n{}".format(
|
||||||
|
f_id,
|
||||||
|
pdf_data['einsatz'],
|
||||||
|
pdf_data['ort'],
|
||||||
|
pdf_data['melder'].replace('\n',' '),
|
||||||
|
pdf_data['hinweis'],
|
||||||
|
pdf_data['sondersignal'],
|
||||||
|
pdf_data['disponierteeinheiten'],
|
||||||
|
pdf_data['bemerkungen'],
|
||||||
|
),
|
||||||
|
title="Feuerwehr Einsatz",
|
||||||
|
url="https://www.google.com/maps/search/?api=1&query={}".format(pdf_data['ort']),
|
||||||
|
url_title="Ort auf Karte suchen"
|
||||||
|
)
|
||||||
|
|
||||||
# create new Einsatzrapport in Lodur
|
# create new Einsatzrapport in Lodur
|
||||||
lodur_client.einsatzrapport(
|
lodur_client.einsatzrapport(
|
||||||
|
@ -173,9 +176,15 @@ def main():
|
||||||
# Update entry in Lodur with parse PDF data
|
# Update entry in Lodur with parse PDF data
|
||||||
lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)
|
lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)
|
||||||
|
|
||||||
# Einsatz finished - publish on MQTT and Gotify
|
# Einsatz finished - publish on pushover
|
||||||
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
logger.info(
|
||||||
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
'[%s] Publishing message on Pushover', f_id
|
||||||
|
)
|
||||||
|
pushover.send_message(
|
||||||
|
"Einsatz {} beendet".format(f_id),
|
||||||
|
title="Feuerwehr Einsatz beendet",
|
||||||
|
)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
logger.error(
|
logger.error(
|
||||||
'[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',
|
'[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
aioeasywebdav==2.4.0
|
aioeasywebdav==2.4.0
|
||||||
MechanicalSoup==0.9.0.post4
|
# MechanicalSoup > 0.11.0 produces "TypeError: expected string or bytes-like
|
||||||
paho-mqtt==1.3.1
|
# object" on file upload
|
||||||
pdfminer.six==20170720
|
MechanicalSoup==0.11.0
|
||||||
python-dotenv==0.7.1
|
python-dotenv==0.10.3
|
||||||
requests>=2.20.0
|
requests>=2.20.0
|
||||||
|
python-pushover==0.4
|
|
@ -1,21 +0,0 @@
|
||||||
import re
|
|
||||||
import logging
|
|
||||||
from pprint import pprint
|
|
||||||
from pathlib import Path
|
|
||||||
from library.pdf_extract import PDFHandling
|
|
||||||
|
|
||||||
PATH = '/tmp/pylokid'
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
level=logging.INFO,
|
|
||||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
||||||
)
|
|
||||||
|
|
||||||
PDF = PDFHandling()
|
|
||||||
|
|
||||||
for path in Path(PATH).glob('**/*.pdf'):
|
|
||||||
file = str(path)
|
|
||||||
print(file)
|
|
||||||
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
|
||||||
print(f_id)
|
|
||||||
pprint(PDF.extract_einsatzausdruck(file, f_id))
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
import re
|
||||||
|
import logging
|
||||||
|
from pprint import pprint
|
||||||
|
from pathlib import Path
|
||||||
|
from library.pdftotext import PDFParsing
|
||||||
|
|
||||||
|
PATH = '/home/tobru/Documents/Feuerwehr/Stab/Fourier/Einsatzdepeschen/2019'
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
PDF = PDFParsing()
|
||||||
|
|
||||||
|
for path in Path(PATH).glob('**/Einsatzausdruck*.pdf'):
|
||||||
|
file = str(path)
|
||||||
|
print(file)
|
||||||
|
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
||||||
|
print(f_id)
|
||||||
|
pprint(PDF.extract_einsatzausdruck(file, f_id))
|
||||||
|
|
||||||
|
"""
|
||||||
|
for path in Path(PATH).glob('**/Einsatzprotokoll*.pdf'):
|
||||||
|
file = str(path)
|
||||||
|
print(file)
|
||||||
|
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
||||||
|
print(f_id)
|
||||||
|
pprint(PDF.extract_einsatzprotokoll(file, f_id))
|
||||||
|
"""
|
Loading…
Reference in New Issue