Merge pull request #9 from tobru/rewrite_pdf_parsing
Rewrite pdf parsing
This commit is contained in:
commit
d9d72ee442
|
@ -1,5 +1,11 @@
|
|||
FROM python:3.7
|
||||
|
||||
# Install pdftotext
|
||||
RUN set -x; \
|
||||
apt update && \
|
||||
apt install -y poppler-utils && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /usr/src/pylokid
|
||||
COPY requirements.txt ./
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
|
|
@ -1,114 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" The dashboard client """
|
||||
|
||||
import os
|
||||
import logging
|
||||
import subprocess
|
||||
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
import paho.mqtt.client as mqtt
|
||||
|
||||
# Configuration
|
||||
load_dotenv(find_dotenv())
|
||||
MQTT_SERVER = os.getenv("MQTT_SERVER")
|
||||
MQTT_USER = os.getenv("MQTT_USER")
|
||||
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
|
||||
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
|
||||
CEC_ENABLED = os.getenv("CEC_ENABLED", "yes")
|
||||
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
|
||||
|
||||
# Initialization
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
LOGGER = logging.getLogger('dashboard')
|
||||
|
||||
PIDS = {}
|
||||
|
||||
def on_connect(client, userdata, flags, rc):
|
||||
LOGGER.info("Connected to MQTT with result code %s", str(rc))
|
||||
|
||||
# Subscribing in on_connect() means that if we lose the connection and
|
||||
# reconnect then subscriptions will be renewed.
|
||||
client.subscribe("pylokid/#")
|
||||
|
||||
def on_message(client, userdata, msg):
|
||||
topic_detail = msg.topic.split("/")
|
||||
f_id = topic_detail[2]
|
||||
if topic_detail[1] == 'Einsatzausdruck_FW' and topic_detail[3] == 'pdf':
|
||||
LOGGER.info("[%s] New Einsatzausdruck received", f_id)
|
||||
file_name = TMP_DIR + "/dashboard_" + f_id + ".pdf"
|
||||
file = open(file_name, "wb")
|
||||
file.write(msg.payload)
|
||||
file.close()
|
||||
|
||||
if f_id in PIDS:
|
||||
LOGGER.info(
|
||||
"[%s] Einsatzausdruck already being displayed with PID %s",
|
||||
f_id,
|
||||
str(PIDS[f_id])
|
||||
)
|
||||
else:
|
||||
LOGGER.info("[%s] Displaying Einsatzausdruck with xpdf", f_id)
|
||||
# TODO turn on TV with cec-client
|
||||
process = subprocess.Popen(
|
||||
["/usr/bin/xpdf", "-z", "width", "-fullscreen", file_name],
|
||||
env=dict(os.environ, DISPLAY=":0")
|
||||
)
|
||||
PIDS[f_id] = process.pid
|
||||
|
||||
if CEC_ENABLED == "yes":
|
||||
# Check power state of TV
|
||||
status = subprocess.run(
|
||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
||||
stdout=subprocess.PIPE,
|
||||
input=b'pow 0').stdout
|
||||
if status.splitlines()[1] == b'power status: standby':
|
||||
LOGGER.info("[%s] CEC power status: standby. Powering TV on", f_id)
|
||||
subprocess.run(
|
||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
||||
stdout=subprocess.PIPE,
|
||||
input=b'on 0'
|
||||
)
|
||||
else:
|
||||
LOGGER.info("[%s] CEC power status: probably on", f_id)
|
||||
elif topic_detail[1] == 'Einsatzprotokoll':
|
||||
LOGGER.info("[%s] New Einsatzprotokoll received", f_id)
|
||||
if f_id in PIDS:
|
||||
LOGGER.info("[%s] Killing xpdf PID %s", f_id, str(PIDS[f_id]))
|
||||
os.kill(PIDS[f_id], 9)
|
||||
PIDS.pop(f_id)
|
||||
else:
|
||||
LOGGER.info("[%s] No xpdf PID found", f_id)
|
||||
|
||||
if CEC_ENABLED == "yes":
|
||||
# Turn off TV if no xpdf running anymore
|
||||
if not PIDS:
|
||||
LOGGER.info("[%s] No xpdf running anymore. Powering TV off", f_id)
|
||||
subprocess.run(
|
||||
["/usr/bin/cec-client", "-s", "-d", "1"],
|
||||
stdout=subprocess.PIPE,
|
||||
input=b'standby 0'
|
||||
)
|
||||
else:
|
||||
LOGGER.info("[%s] Unknown", topic_detail[1])
|
||||
|
||||
def main():
|
||||
""" main """
|
||||
|
||||
mqtt_client = mqtt.Client()
|
||||
mqtt_client.on_connect = on_connect
|
||||
mqtt_client.on_message = on_message
|
||||
|
||||
mqtt_client.username_pw_set(MQTT_USER, password=MQTT_PASSWORD)
|
||||
mqtt_client.tls_set()
|
||||
mqtt_client.connect(MQTT_SERVER, 8883, 60)
|
||||
mqtt_client.loop_forever()
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
main()
|
||||
except KeyboardInterrupt:
|
||||
print("Byebye")
|
|
@ -1,14 +0,0 @@
|
|||
[Unit]
|
||||
Description=PyLokid Dashboard Client
|
||||
After=network.target
|
||||
|
||||
[Service]
|
||||
User=pi
|
||||
Restart=always
|
||||
Environment="MQTT_SERVER=mybroker.example.com"
|
||||
Environment="MQTT_USER=myuser"
|
||||
Environment="MQTT_PASSWORD=mypassword"
|
||||
ExecStart=/usr/bin/python3 /opt/dashboard_client.py
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
|
@ -1,36 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" Gotify Functions """
|
||||
|
||||
import logging
|
||||
import json
|
||||
from urllib.parse import urljoin
|
||||
import requests
|
||||
|
||||
class GotifyClient:
|
||||
""" Gotify Client """
|
||||
|
||||
def __init__(self, url, token):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.info('Gotify URL %s', url)
|
||||
|
||||
self.url = url
|
||||
self.token = token
|
||||
|
||||
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
|
||||
""" Publish a message over Gotify """
|
||||
|
||||
requestURL = urljoin(self.url, '/message?token=' + self.token)
|
||||
|
||||
try:
|
||||
resp = requests.post(requestURL, json={
|
||||
'title': 'Einsatz ' + f_id,
|
||||
'message': f_type,
|
||||
'priority': 5
|
||||
})
|
||||
except requests.exceptions.RequestException as err:
|
||||
self.logger.error('[%s] Could not connect to Gotify server: %e', f_id, err)
|
||||
|
||||
# Print request result if server returns http error code
|
||||
if resp.status_code is not requests.codes.ok:
|
||||
self.logger.error('[%s] Could not send message to Gotify server: %e', f_id, bytes.decode(resp.content))
|
|
@ -71,11 +71,11 @@ class Lodur:
|
|||
try:
|
||||
zh_fw_ausg = datetime.strptime(
|
||||
pdf_data['ausgerueckt'],
|
||||
'%H:%M',
|
||||
'%H:%M:%S',
|
||||
)
|
||||
zh_am_schad = datetime.strptime(
|
||||
pdf_data['anort'],
|
||||
'%H:%M',
|
||||
pdf_data['vorort'],
|
||||
'%H:%M:%S',
|
||||
)
|
||||
except ValueError as err:
|
||||
self.logger.error('[%s] Date parsing failed: %s', f_id, err)
|
||||
|
@ -120,9 +120,9 @@ class Lodur:
|
|||
'%H:%M',
|
||||
)
|
||||
eins_ereig = pdf_data['einsatz']
|
||||
bemerkungen = pdf_data['bemerkungen']
|
||||
bemerkungen = pdf_data['bemerkungen'] + '\n' + pdf_data['disponierteeinheiten']
|
||||
wer_ala = pdf_data['melder']
|
||||
adr = pdf_data['strasse'] + ', ' + pdf_data['ort']
|
||||
adr = pdf_data['ort']
|
||||
else:
|
||||
date = datetime.now()
|
||||
time = datetime.now()
|
||||
|
@ -229,6 +229,7 @@ class Lodur:
|
|||
# Encoding bk causes some troubles - therefore we skip that - but it
|
||||
# would be good if it would be encoded as it can / will contain f.e.abs
|
||||
# Umlauts
|
||||
# AttributeError: 'bytes' object has no attribute 'parent'
|
||||
self.logger.info('Form data: %s = %s', key, value)
|
||||
if key in ('eins_ereig', 'adr', 'wer_ala'):
|
||||
self.browser[key] = value.encode('iso-8859-1')
|
||||
|
|
|
@ -1,46 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" MQTT Functions """
|
||||
|
||||
import logging
|
||||
import json
|
||||
import paho.mqtt.client as mqtt
|
||||
|
||||
class MQTTClient:
|
||||
""" MQTT Client """
|
||||
|
||||
def __init__(self, server, username, password, base_topic):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.info('Connecting to MQTT broker %s', server)
|
||||
|
||||
try:
|
||||
self.mqtt_client = mqtt.Client('pylokid')
|
||||
self.mqtt_client.username_pw_set(username, password=password)
|
||||
self.mqtt_client.tls_set()
|
||||
self.mqtt_client.connect(server, 8883, 60)
|
||||
self.mqtt_client.loop_start()
|
||||
self.logger.info('MQTT connection successful')
|
||||
except Exception as err:
|
||||
self.logger.error('MQTT connection failed: %s', str(err))
|
||||
|
||||
self.base_topic = base_topic
|
||||
|
||||
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
|
||||
""" Publish a message over MQTT """
|
||||
|
||||
if f_type == 'Einsatzausdruck_FW':
|
||||
try:
|
||||
topic = "{0}/Einsatzausdruck_FW/{1}/".format(self.base_topic, f_id)
|
||||
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
|
||||
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
|
||||
|
||||
## Publish the PDF blob
|
||||
pdf_fh = open(pdf_file, 'rb')
|
||||
pdf_binary = pdf_fh.read()
|
||||
self.mqtt_client.publish(topic + 'pdf', bytes(pdf_binary))
|
||||
except IndexError as err:
|
||||
self.logger.info('[%s] Cannot publish information: %s', f_id, err)
|
||||
elif f_type == 'Einsatzprotokoll':
|
||||
topic = "{0}/Einsatzprotokoll/{1}/".format(self.base_topic, f_id)
|
||||
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
|
||||
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
|
|
@ -1,209 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" extracts data from ELZ PDFs """
|
||||
|
||||
import io
|
||||
import logging
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import TextConverter
|
||||
from pdfminer.layout import LAParams
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
|
||||
class PDFHandling:
|
||||
""" PDF handling like parsing """
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# less logging for pdfminer - more is not needed
|
||||
logger_doc = logging.getLogger('pdfminer.pdfdocument')
|
||||
logger_doc.setLevel(logging.WARNING)
|
||||
logger_page = logging.getLogger('pdfminer.pdfpage')
|
||||
logger_page.setLevel(logging.WARNING)
|
||||
logger_interp = logging.getLogger('pdfminer.pdfinterp')
|
||||
logger_interp.setLevel(logging.WARNING)
|
||||
logger_psparser = logging.getLogger('pdfminer.psparser')
|
||||
logger_psparser.setLevel(logging.WARNING)
|
||||
logger_cmapdb = logging.getLogger('pdfminer.cmapdb')
|
||||
logger_cmapdb.setLevel(logging.WARNING)
|
||||
logger_pdfparser = logging.getLogger('pdfminer.pdfparser')
|
||||
logger_pdfparser.setLevel(logging.WARNING)
|
||||
|
||||
def concatenate_to_multiline_string(self, data, start, end):
|
||||
""" concatenates multiple lines to a single multiline string """
|
||||
|
||||
res = ''
|
||||
counter = start
|
||||
while counter <= end:
|
||||
res += data[counter] + '\n'
|
||||
counter += 1
|
||||
return res
|
||||
|
||||
def convert(self, file):
|
||||
""" converts the PDF to a multiline string """
|
||||
|
||||
pagenums = set()
|
||||
manager = PDFResourceManager()
|
||||
codec = 'utf-8'
|
||||
caching = True
|
||||
|
||||
output = io.StringIO()
|
||||
converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
|
||||
|
||||
interpreter = PDFPageInterpreter(manager, converter)
|
||||
infile = open(file, 'rb')
|
||||
|
||||
for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
|
||||
interpreter.process_page(page)
|
||||
|
||||
converted_pdf = output.getvalue()
|
||||
|
||||
infile.close()
|
||||
converter.close()
|
||||
output.close()
|
||||
return converted_pdf
|
||||
|
||||
def extract_einsatzausdruck(self, file, f_id):
|
||||
""" extracts as many information from the parsed Einsatzausdruck as possible """
|
||||
|
||||
converted = self.convert(file)
|
||||
splited = converted.splitlines()
|
||||
|
||||
self.logger.debug('[%s] Parsed PDF raw:\n %s', f_id, converted)
|
||||
self.logger.debug('[%s] Line-splited PDF: %s', f_id, splited)
|
||||
|
||||
# search some well-known words for later positional computation
|
||||
try:
|
||||
index_einsatzauftragfw = splited.index('Einsatzauftrag Feuerwehr')
|
||||
index_erfasser = splited.index('Erfasser')
|
||||
index_auftrag = splited.index('Auftrag')
|
||||
index_bemerkungen = splited.index('Bemerkungen')
|
||||
index_dispo = splited.index('Disponierte Einheiten')
|
||||
index_einsatz = splited.index('Einsatz')
|
||||
index_hinweis = splited.index('Hinweis')
|
||||
index_maps = splited.index('Google Maps')
|
||||
except ValueError as err:
|
||||
self.logger.error('[%s] PDF file does not look like a Einsatzausdruck: %s', f_id, err)
|
||||
return False
|
||||
|
||||
# the PDF parsing not always produces the same output
|
||||
# let's define the already known output
|
||||
if index_bemerkungen == 6:
|
||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
||||
# get length of bemerkungen field
|
||||
# it lives between the line which contains 'Bemerkungen' and
|
||||
# the line 'Disponierte Einheiten'
|
||||
length_bemerkungen = index_auftrag - index_bemerkungen - 1
|
||||
erfasser = splited[index_dispo - 2]
|
||||
auftrag = splited[index_erfasser + 2]
|
||||
datum = splited[index_erfasser + 3]
|
||||
zeit = splited[index_erfasser + 4]
|
||||
einsatz = splited[index_einsatz - 6]
|
||||
sondersignal = splited[index_einsatz - 5]
|
||||
ort = splited[index_einsatz - 3]
|
||||
strasse = splited[index_einsatz - 2]
|
||||
# sometimes there is just a phone number for the field melder but on
|
||||
# the second line, so the lines vary for erfasser and melder
|
||||
if index_dispo - index_erfasser == 10:
|
||||
melder = splited[index_dispo - 4] + ', ' + splited[index_dispo - 3]
|
||||
else:
|
||||
melder = splited[index_dispo - 4]
|
||||
# BMA style
|
||||
elif index_bemerkungen == 20:
|
||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
||||
erfasser = splited[index_bemerkungen - 2]
|
||||
auftrag = splited[index_einsatzauftragfw + 2]
|
||||
datum = splited[index_einsatzauftragfw + 3]
|
||||
zeit = splited[index_einsatzauftragfw + 4]
|
||||
einsatz = splited[index_einsatz + 6]
|
||||
sondersignal = splited[index_einsatz + 7]
|
||||
ort = splited[index_einsatz + 9]
|
||||
strasse = splited[index_einsatz + 10]
|
||||
melder = 'BMA' # There is no melder on a BMA Einsatzausdruck
|
||||
elif index_bemerkungen == 21 or index_bemerkungen == 22:
|
||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
||||
erfasser = splited[index_bemerkungen - 2]
|
||||
auftrag = splited[index_erfasser + 2]
|
||||
datum = splited[index_erfasser + 3]
|
||||
zeit = splited[index_erfasser + 4]
|
||||
einsatz = splited[index_einsatz - 6]
|
||||
sondersignal = splited[index_einsatz - 5]
|
||||
ort = splited[index_einsatz - 3]
|
||||
strasse = splited[index_einsatz - 2]
|
||||
if index_bemerkungen - index_erfasser == 10:
|
||||
melder = splited[index_bemerkungen - 4] + ', ' + splited[index_bemerkungen - 3]
|
||||
else:
|
||||
melder = splited[index_bemerkungen - 4]
|
||||
elif index_bemerkungen == 24:
|
||||
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
|
||||
length_bemerkungen = index_dispo - index_bemerkungen - 1
|
||||
erfasser = splited[index_bemerkungen - 2]
|
||||
auftrag = splited[index_einsatzauftragfw + 4]
|
||||
datum = splited[index_einsatzauftragfw + 9]
|
||||
zeit = splited[index_einsatzauftragfw + 10]
|
||||
einsatz = splited[index_einsatz - 4]
|
||||
sondersignal = splited[index_einsatz - 3]
|
||||
ort = ''
|
||||
strasse = splited[index_einsatz - 2]
|
||||
melder = splited[index_dispo - 8] + ', ' + splited[index_dispo - 7]
|
||||
else:
|
||||
self.logger.error('[%s] Unknown location of Bemerkungen. Line %s', f_id, index_bemerkungen)
|
||||
return False
|
||||
|
||||
# sanity check to see if we can correlate the f_id
|
||||
if f_id == auftrag:
|
||||
self.logger.info('[%s] ID matches in PDF', f_id)
|
||||
else:
|
||||
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, auftrag)
|
||||
return False
|
||||
|
||||
# try to find out if there is a hinweis
|
||||
# if yes, the difference between the indexes is 4, else it's shorter
|
||||
if index_maps - index_hinweis == 4:
|
||||
hinweis = splited[index_hinweis+2]
|
||||
else:
|
||||
hinweis = ''
|
||||
|
||||
data = {
|
||||
'auftrag': auftrag,
|
||||
'datum': datum,
|
||||
'zeit': zeit,
|
||||
'melder': melder,
|
||||
'erfasser': erfasser,
|
||||
'bemerkungen': self.concatenate_to_multiline_string(
|
||||
splited,
|
||||
index_bemerkungen + 1,
|
||||
index_bemerkungen + length_bemerkungen
|
||||
).rstrip(),
|
||||
'einsatz': einsatz,
|
||||
'sondersignal': sondersignal,
|
||||
'ort': ort.title(),
|
||||
'strasse': strasse.title(),
|
||||
#'objekt': splited[],
|
||||
'hinweis': hinweis,
|
||||
}
|
||||
return data
|
||||
|
||||
def extract_einsatzprotokoll(self, file, f_id):
|
||||
""" extracts as many information from the parsed Einsatzprotokoll as possible """
|
||||
|
||||
splited = self.convert(file).splitlines()
|
||||
|
||||
# sanity check to see if we can correlate the f_id
|
||||
if f_id == splited[26]:
|
||||
self.logger.info('[%s] ID matches in PDF', f_id)
|
||||
else:
|
||||
self.logger.error('[%s] ID does not match in PDF', f_id)
|
||||
return False
|
||||
|
||||
data = {
|
||||
'auftrag': splited[26],
|
||||
'datum': splited[25],
|
||||
'angelegt': splited[28],
|
||||
'disposition': splited[30],
|
||||
'ausgerueckt': splited[32],
|
||||
'anort': splited[33],
|
||||
}
|
||||
return data
|
|
@ -0,0 +1,142 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
""" extracts data from ELZ PDFs using Poppler pdftotext """
|
||||
|
||||
import subprocess
|
||||
import logging
|
||||
|
||||
class PDFParsing:
|
||||
""" PDF parsing """
|
||||
|
||||
def __init__(self):
|
||||
self.logger = logging.getLogger(__name__)
|
||||
self.logger.info('PDF parsing based on pdftotext loaded')
|
||||
|
||||
def extract(self, f_id, file, datafields):
|
||||
|
||||
self.logger.info('[%s] parsing PDF file %s', f_id, file)
|
||||
|
||||
data = {}
|
||||
|
||||
for field, coordinate in datafields.items():
|
||||
|
||||
# x-coordinate of the crop area top left corner
|
||||
x = coordinate['xMin']
|
||||
|
||||
# y-coordinate of the crop area top left corner
|
||||
y = coordinate['yMin']
|
||||
|
||||
# width of crop area in pixels
|
||||
w = coordinate['xMax'] - coordinate['xMin']
|
||||
|
||||
# height of crop area in pixels
|
||||
h = coordinate['yMax'] - coordinate['yMin']
|
||||
|
||||
self.logger.debug('[%s] Computed command for field %s: %s', f_id, field,
|
||||
'pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}'.format(x,y,w,h)
|
||||
)
|
||||
|
||||
scrapeddata = subprocess.Popen([
|
||||
'/usr/bin/pdftotext',
|
||||
'-f', '1',
|
||||
'-l', '1',
|
||||
'-x', str(x),
|
||||
'-y', str(y),
|
||||
'-W', str(w),
|
||||
'-H', str(h),
|
||||
file,
|
||||
'-'
|
||||
],
|
||||
stdout=subprocess.PIPE,
|
||||
stderr=subprocess.STDOUT,
|
||||
text=True)
|
||||
stdout, _ = scrapeddata.communicate()
|
||||
|
||||
## TODO: fixup some fields (lowercase, remove unnecessary \n)
|
||||
if 'edit' in coordinate and coordinate['edit'] == 'title':
|
||||
data[field] = stdout.rstrip().title()
|
||||
else:
|
||||
data[field] = stdout.rstrip()
|
||||
|
||||
# sanity check to see if we can correlate the f_id
|
||||
if f_id == data['auftrag']:
|
||||
self.logger.debug('[%s] ID matches in PDF', f_id)
|
||||
return data
|
||||
else:
|
||||
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, data['auftrag'])
|
||||
return False
|
||||
|
||||
def extract_einsatzausdruck(self, file, f_id):
|
||||
""" extracts information from Einsatzausdruck using external pdftotext """
|
||||
|
||||
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
|
||||
|
||||
# Get them using 'pdftotext -bbox'
|
||||
# y = row
|
||||
# x = column: xMax 450 / 590 means full width
|
||||
coordinates = {
|
||||
'auftrag': {
|
||||
'xMin': 70, 'yMin': 47, 'xMax': 120,'yMax': 58,
|
||||
},
|
||||
'datum': {
|
||||
'xMin': 190, 'yMin': 47, 'xMax': 239, 'yMax': 58,
|
||||
},
|
||||
'zeit': {
|
||||
'xMin': 190, 'yMin': 59, 'xMax': 215, 'yMax': 70,
|
||||
},
|
||||
'melder': {
|
||||
'xMin': 304, 'yMin': 47, 'xMax': 446, 'yMax': 70, 'edit': 'title'
|
||||
},
|
||||
'erfasser':{
|
||||
'xMin': 448, 'yMin': 59, 'xMax': 478, 'yMax': 70,
|
||||
},
|
||||
# big field until "Disponierte Einheiten"
|
||||
'bemerkungen': {
|
||||
'xMin': 28, 'yMin': 112, 'xMax': 590, 'yMax': 350,
|
||||
},
|
||||
'disponierteeinheiten': {
|
||||
'xMin': 28, 'yMin': 366, 'xMax': 450, 'yMax': 376,
|
||||
},
|
||||
'einsatz': {
|
||||
'xMin': 76, 'yMin': 690, 'xMax': 450, 'yMax': 703,
|
||||
},
|
||||
'sondersignal': {
|
||||
'xMin': 76, 'yMin': 707, 'xMax': 450, 'yMax': 721,
|
||||
},
|
||||
'ort': {
|
||||
'xMin': 76, 'yMin': 732, 'xMax': 590, 'yMax': 745,
|
||||
},
|
||||
'hinweis': {
|
||||
'xMin': 76, 'yMin': 773, 'xMax': 450, 'yMax': 787,
|
||||
},
|
||||
}
|
||||
|
||||
return self.extract(f_id, file, coordinates)
|
||||
|
||||
def extract_einsatzprotokoll(self, file, f_id):
|
||||
""" extracts information from Einsatzprotokoll using external pdftotext """
|
||||
|
||||
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
|
||||
|
||||
# Get them using 'pdftotext -bbox'
|
||||
# y = row
|
||||
# x = column: xMax 450 / 590 means full width
|
||||
coordinates = {
|
||||
'auftrag': {
|
||||
'xMin': 192, 'yMin': 132, 'xMax': 238,'yMax': 142,
|
||||
},
|
||||
'angelegt': {
|
||||
'xMin': 192, 'yMin': 294, 'xMax': 226, 'yMax': 304,
|
||||
},
|
||||
'dispo': {
|
||||
'xMin': 192, 'yMin': 312, 'xMax': 226, 'yMax': 322,
|
||||
},
|
||||
'ausgerueckt': {
|
||||
'xMin': 192, 'yMin': 331, 'xMax': 226, 'yMax': 341,
|
||||
},
|
||||
'vorort':{
|
||||
'xMin': 192, 'yMin': 348, 'xMax': 226, 'yMax': 358,
|
||||
},
|
||||
}
|
||||
|
||||
return self.extract(f_id, file, coordinates)
|
67
main.py
67
main.py
|
@ -8,13 +8,12 @@ import time
|
|||
|
||||
import requests
|
||||
from dotenv import find_dotenv, load_dotenv
|
||||
from pushover import Client
|
||||
|
||||
# local classes
|
||||
from library.emailhandling import EmailHandling
|
||||
from library.lodur import Lodur
|
||||
from library.mqtt import MQTTClient
|
||||
from library.gotify import GotifyClient
|
||||
from library.pdf_extract import PDFHandling
|
||||
from library.pdftotext import PDFParsing
|
||||
from library.webdav import WebDav
|
||||
|
||||
# Configuration
|
||||
|
@ -29,17 +28,13 @@ WEBDAV_USERNAME = os.getenv("WEBDAV_USERNAME")
|
|||
WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
|
||||
WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
|
||||
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
|
||||
MQTT_SERVER = os.getenv("MQTT_SERVER")
|
||||
MQTT_USER = os.getenv("MQTT_USER")
|
||||
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
|
||||
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
|
||||
LODUR_USER = os.getenv("LODUR_USER")
|
||||
LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
|
||||
LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
|
||||
HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
|
||||
GOTIFY_URL = os.getenv("GOTIFY_URL")
|
||||
GOTIFY_TOKEN = os.getenv("GOTIFY_TOKEN")
|
||||
PYLOKID_VERSION = "1.2.0"
|
||||
PUSHOVER_API_TOKEN = os.getenv("PUSHOVER_API_TOKEN")
|
||||
PUSHOVER_USER_KEY = os.getenv("PUSHOVER_USER_KEY")
|
||||
PYLOKID_VERSION = "2.0.0"
|
||||
|
||||
def main():
|
||||
""" main """
|
||||
|
@ -77,22 +72,14 @@ def main():
|
|||
TMP_DIR,
|
||||
)
|
||||
|
||||
# Initialize MQTT Sessions
|
||||
mqtt_client = MQTTClient(
|
||||
MQTT_SERVER,
|
||||
MQTT_USER,
|
||||
MQTT_PASSWORD,
|
||||
MQTT_BASE_TOPIC,
|
||||
)
|
||||
|
||||
# Initialize Gotify
|
||||
gotify_client = GotifyClient(
|
||||
GOTIFY_URL,
|
||||
GOTIFY_TOKEN,
|
||||
# Initialize Pushover
|
||||
pushover = Client(
|
||||
user_key=PUSHOVER_USER_KEY,
|
||||
api_token=PUSHOVER_API_TOKEN
|
||||
)
|
||||
|
||||
# Initialize PDF Parser
|
||||
pdf = PDFHandling()
|
||||
pdf = PDFParsing()
|
||||
|
||||
# Main Loop
|
||||
while True:
|
||||
|
@ -133,9 +120,25 @@ def main():
|
|||
f_id,
|
||||
)
|
||||
|
||||
# publish Einsatz on MQTT and Gotify
|
||||
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
||||
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
||||
# publish Einsatz on Pushover
|
||||
logger.info(
|
||||
'[%s] Publishing message on Pushover', f_id
|
||||
)
|
||||
pushover.send_message(
|
||||
"Einsatz {} eröffnet: {}\n\n* Ort: {}\n* Melder: {}\n* Hinweis: {}\n* {}\n\n{}\n\n{}".format(
|
||||
f_id,
|
||||
pdf_data['einsatz'],
|
||||
pdf_data['ort'],
|
||||
pdf_data['melder'].replace('\n',' '),
|
||||
pdf_data['hinweis'],
|
||||
pdf_data['sondersignal'],
|
||||
pdf_data['disponierteeinheiten'],
|
||||
pdf_data['bemerkungen'],
|
||||
),
|
||||
title="Feuerwehr Einsatz",
|
||||
url="https://www.google.com/maps/search/?api=1&query={}".format(pdf_data['ort']),
|
||||
url_title="Ort auf Karte suchen"
|
||||
)
|
||||
|
||||
# create new Einsatzrapport in Lodur
|
||||
lodur_client.einsatzrapport(
|
||||
|
@ -173,9 +176,15 @@ def main():
|
|||
# Update entry in Lodur with parse PDF data
|
||||
lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)
|
||||
|
||||
# Einsatz finished - publish on MQTT and Gotify
|
||||
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
||||
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
|
||||
# Einsatz finished - publish on pushover
|
||||
logger.info(
|
||||
'[%s] Publishing message on Pushover', f_id
|
||||
)
|
||||
pushover.send_message(
|
||||
"Einsatz {} beendet".format(f_id),
|
||||
title="Feuerwehr Einsatz beendet",
|
||||
)
|
||||
|
||||
else:
|
||||
logger.error(
|
||||
'[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
aioeasywebdav==2.4.0
|
||||
MechanicalSoup==0.9.0.post4
|
||||
paho-mqtt==1.3.1
|
||||
pdfminer.six==20170720
|
||||
python-dotenv==0.7.1
|
||||
# MechanicalSoup > 0.11.0 produces "TypeError: expected string or bytes-like
|
||||
# object" on file upload
|
||||
MechanicalSoup==0.11.0
|
||||
python-dotenv==0.10.3
|
||||
requests>=2.20.0
|
||||
python-pushover==0.4
|
|
@ -1,21 +0,0 @@
|
|||
import re
|
||||
import logging
|
||||
from pprint import pprint
|
||||
from pathlib import Path
|
||||
from library.pdf_extract import PDFHandling
|
||||
|
||||
PATH = '/tmp/pylokid'
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
PDF = PDFHandling()
|
||||
|
||||
for path in Path(PATH).glob('**/*.pdf'):
|
||||
file = str(path)
|
||||
print(file)
|
||||
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
||||
print(f_id)
|
||||
pprint(PDF.extract_einsatzausdruck(file, f_id))
|
|
@ -0,0 +1,30 @@
|
|||
import re
|
||||
import logging
|
||||
from pprint import pprint
|
||||
from pathlib import Path
|
||||
from library.pdftotext import PDFParsing
|
||||
|
||||
PATH = '/home/tobru/Documents/Feuerwehr/Stab/Fourier/Einsatzdepeschen/2019'
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
PDF = PDFParsing()
|
||||
|
||||
for path in Path(PATH).glob('**/Einsatzausdruck*.pdf'):
|
||||
file = str(path)
|
||||
print(file)
|
||||
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
||||
print(f_id)
|
||||
pprint(PDF.extract_einsatzausdruck(file, f_id))
|
||||
|
||||
"""
|
||||
for path in Path(PATH).glob('**/Einsatzprotokoll*.pdf'):
|
||||
file = str(path)
|
||||
print(file)
|
||||
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
|
||||
print(f_id)
|
||||
pprint(PDF.extract_einsatzprotokoll(file, f_id))
|
||||
"""
|
Loading…
Reference in New Issue