Merge pull request #9 from tobru/rewrite_pdf_parsing

Rewrite pdf parsing
This commit is contained in:
Tobias Brunner 2019-09-22 22:02:55 +02:00 committed by GitHub
commit d9d72ee442
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 227 additions and 478 deletions

View File

@ -1,5 +1,11 @@
FROM python:3.7
# Install pdftotext
RUN set -x; \
apt update && \
apt install -y poppler-utils && \
rm -rf /var/lib/apt/lists/*
WORKDIR /usr/src/pylokid
COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

View File

@ -1,114 +0,0 @@
#!/usr/bin/env python3
""" The dashboard client """
import os
import logging
import subprocess
from dotenv import find_dotenv, load_dotenv
import paho.mqtt.client as mqtt
# Configuration
load_dotenv(find_dotenv())
MQTT_SERVER = os.getenv("MQTT_SERVER")
MQTT_USER = os.getenv("MQTT_USER")
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
CEC_ENABLED = os.getenv("CEC_ENABLED", "yes")
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
# Initialization
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
LOGGER = logging.getLogger('dashboard')
PIDS = {}
def on_connect(client, userdata, flags, rc):
LOGGER.info("Connected to MQTT with result code %s", str(rc))
# Subscribing in on_connect() means that if we lose the connection and
# reconnect then subscriptions will be renewed.
client.subscribe("pylokid/#")
def on_message(client, userdata, msg):
topic_detail = msg.topic.split("/")
f_id = topic_detail[2]
if topic_detail[1] == 'Einsatzausdruck_FW' and topic_detail[3] == 'pdf':
LOGGER.info("[%s] New Einsatzausdruck received", f_id)
file_name = TMP_DIR + "/dashboard_" + f_id + ".pdf"
file = open(file_name, "wb")
file.write(msg.payload)
file.close()
if f_id in PIDS:
LOGGER.info(
"[%s] Einsatzausdruck already being displayed with PID %s",
f_id,
str(PIDS[f_id])
)
else:
LOGGER.info("[%s] Displaying Einsatzausdruck with xpdf", f_id)
# TODO turn on TV with cec-client
process = subprocess.Popen(
["/usr/bin/xpdf", "-z", "width", "-fullscreen", file_name],
env=dict(os.environ, DISPLAY=":0")
)
PIDS[f_id] = process.pid
if CEC_ENABLED == "yes":
# Check power state of TV
status = subprocess.run(
["/usr/bin/cec-client", "-s", "-d", "1"],
stdout=subprocess.PIPE,
input=b'pow 0').stdout
if status.splitlines()[1] == b'power status: standby':
LOGGER.info("[%s] CEC power status: standby. Powering TV on", f_id)
subprocess.run(
["/usr/bin/cec-client", "-s", "-d", "1"],
stdout=subprocess.PIPE,
input=b'on 0'
)
else:
LOGGER.info("[%s] CEC power status: probably on", f_id)
elif topic_detail[1] == 'Einsatzprotokoll':
LOGGER.info("[%s] New Einsatzprotokoll received", f_id)
if f_id in PIDS:
LOGGER.info("[%s] Killing xpdf PID %s", f_id, str(PIDS[f_id]))
os.kill(PIDS[f_id], 9)
PIDS.pop(f_id)
else:
LOGGER.info("[%s] No xpdf PID found", f_id)
if CEC_ENABLED == "yes":
# Turn off TV if no xpdf running anymore
if not PIDS:
LOGGER.info("[%s] No xpdf running anymore. Powering TV off", f_id)
subprocess.run(
["/usr/bin/cec-client", "-s", "-d", "1"],
stdout=subprocess.PIPE,
input=b'standby 0'
)
else:
LOGGER.info("[%s] Unknown", topic_detail[1])
def main():
""" main """
mqtt_client = mqtt.Client()
mqtt_client.on_connect = on_connect
mqtt_client.on_message = on_message
mqtt_client.username_pw_set(MQTT_USER, password=MQTT_PASSWORD)
mqtt_client.tls_set()
mqtt_client.connect(MQTT_SERVER, 8883, 60)
mqtt_client.loop_forever()
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("Byebye")

View File

@ -1,14 +0,0 @@
[Unit]
Description=PyLokid Dashboard Client
After=network.target
[Service]
User=pi
Restart=always
Environment="MQTT_SERVER=mybroker.example.com"
Environment="MQTT_USER=myuser"
Environment="MQTT_PASSWORD=mypassword"
ExecStart=/usr/bin/python3 /opt/dashboard_client.py
[Install]
WantedBy=multi-user.target

View File

@ -1,36 +0,0 @@
#!/usr/bin/env python3
""" Gotify Functions """
import logging
import json
from urllib.parse import urljoin
import requests
class GotifyClient:
""" Gotify Client """
def __init__(self, url, token):
self.logger = logging.getLogger(__name__)
self.logger.info('Gotify URL %s', url)
self.url = url
self.token = token
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
""" Publish a message over Gotify """
requestURL = urljoin(self.url, '/message?token=' + self.token)
try:
resp = requests.post(requestURL, json={
'title': 'Einsatz ' + f_id,
'message': f_type,
'priority': 5
})
except requests.exceptions.RequestException as err:
self.logger.error('[%s] Could not connect to Gotify server: %e', f_id, err)
# Print request result if server returns http error code
if resp.status_code is not requests.codes.ok:
self.logger.error('[%s] Could not send message to Gotify server: %e', f_id, bytes.decode(resp.content))

View File

@ -71,11 +71,11 @@ class Lodur:
try:
zh_fw_ausg = datetime.strptime(
pdf_data['ausgerueckt'],
'%H:%M',
'%H:%M:%S',
)
zh_am_schad = datetime.strptime(
pdf_data['anort'],
'%H:%M',
pdf_data['vorort'],
'%H:%M:%S',
)
except ValueError as err:
self.logger.error('[%s] Date parsing failed: %s', f_id, err)
@ -120,9 +120,9 @@ class Lodur:
'%H:%M',
)
eins_ereig = pdf_data['einsatz']
bemerkungen = pdf_data['bemerkungen']
bemerkungen = pdf_data['bemerkungen'] + '\n' + pdf_data['disponierteeinheiten']
wer_ala = pdf_data['melder']
adr = pdf_data['strasse'] + ', ' + pdf_data['ort']
adr = pdf_data['ort']
else:
date = datetime.now()
time = datetime.now()
@ -229,6 +229,7 @@ class Lodur:
# Encoding bk causes some troubles - therefore we skip that - but it
# would be good if it would be encoded as it can / will contain f.e.abs
# Umlauts
# AttributeError: 'bytes' object has no attribute 'parent'
self.logger.info('Form data: %s = %s', key, value)
if key in ('eins_ereig', 'adr', 'wer_ala'):
self.browser[key] = value.encode('iso-8859-1')

View File

@ -1,46 +0,0 @@
#!/usr/bin/env python3
""" MQTT Functions """
import logging
import json
import paho.mqtt.client as mqtt
class MQTTClient:
""" MQTT Client """
def __init__(self, server, username, password, base_topic):
self.logger = logging.getLogger(__name__)
self.logger.info('Connecting to MQTT broker %s', server)
try:
self.mqtt_client = mqtt.Client('pylokid')
self.mqtt_client.username_pw_set(username, password=password)
self.mqtt_client.tls_set()
self.mqtt_client.connect(server, 8883, 60)
self.mqtt_client.loop_start()
self.logger.info('MQTT connection successful')
except Exception as err:
self.logger.error('MQTT connection failed: %s', str(err))
self.base_topic = base_topic
def send_message(self, f_type, f_id, pdf_data=None, pdf_file=None):
""" Publish a message over MQTT """
if f_type == 'Einsatzausdruck_FW':
try:
topic = "{0}/Einsatzausdruck_FW/{1}/".format(self.base_topic, f_id)
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))
## Publish the PDF blob
pdf_fh = open(pdf_file, 'rb')
pdf_binary = pdf_fh.read()
self.mqtt_client.publish(topic + 'pdf', bytes(pdf_binary))
except IndexError as err:
self.logger.info('[%s] Cannot publish information: %s', f_id, err)
elif f_type == 'Einsatzprotokoll':
topic = "{0}/Einsatzprotokoll/{1}/".format(self.base_topic, f_id)
self.logger.info('[%s] Publishing information on MQTT topic %s', f_id, topic)
self.mqtt_client.publish(topic + 'json', json.dumps(pdf_data))

View File

@ -1,209 +0,0 @@
#!/usr/bin/env python3
""" extracts data from ELZ PDFs """
import io
import logging
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
class PDFHandling:
""" PDF handling like parsing """
def __init__(self):
self.logger = logging.getLogger(__name__)
# less logging for pdfminer - more is not needed
logger_doc = logging.getLogger('pdfminer.pdfdocument')
logger_doc.setLevel(logging.WARNING)
logger_page = logging.getLogger('pdfminer.pdfpage')
logger_page.setLevel(logging.WARNING)
logger_interp = logging.getLogger('pdfminer.pdfinterp')
logger_interp.setLevel(logging.WARNING)
logger_psparser = logging.getLogger('pdfminer.psparser')
logger_psparser.setLevel(logging.WARNING)
logger_cmapdb = logging.getLogger('pdfminer.cmapdb')
logger_cmapdb.setLevel(logging.WARNING)
logger_pdfparser = logging.getLogger('pdfminer.pdfparser')
logger_pdfparser.setLevel(logging.WARNING)
def concatenate_to_multiline_string(self, data, start, end):
""" concatenates multiple lines to a single multiline string """
res = ''
counter = start
while counter <= end:
res += data[counter] + '\n'
counter += 1
return res
def convert(self, file):
""" converts the PDF to a multiline string """
pagenums = set()
manager = PDFResourceManager()
codec = 'utf-8'
caching = True
output = io.StringIO()
converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
interpreter = PDFPageInterpreter(manager, converter)
infile = open(file, 'rb')
for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
interpreter.process_page(page)
converted_pdf = output.getvalue()
infile.close()
converter.close()
output.close()
return converted_pdf
def extract_einsatzausdruck(self, file, f_id):
""" extracts as many information from the parsed Einsatzausdruck as possible """
converted = self.convert(file)
splited = converted.splitlines()
self.logger.debug('[%s] Parsed PDF raw:\n %s', f_id, converted)
self.logger.debug('[%s] Line-splited PDF: %s', f_id, splited)
# search some well-known words for later positional computation
try:
index_einsatzauftragfw = splited.index('Einsatzauftrag Feuerwehr')
index_erfasser = splited.index('Erfasser')
index_auftrag = splited.index('Auftrag')
index_bemerkungen = splited.index('Bemerkungen')
index_dispo = splited.index('Disponierte Einheiten')
index_einsatz = splited.index('Einsatz')
index_hinweis = splited.index('Hinweis')
index_maps = splited.index('Google Maps')
except ValueError as err:
self.logger.error('[%s] PDF file does not look like a Einsatzausdruck: %s', f_id, err)
return False
# the PDF parsing not always produces the same output
# let's define the already known output
if index_bemerkungen == 6:
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
# get length of bemerkungen field
# it lives between the line which contains 'Bemerkungen' and
# the line 'Disponierte Einheiten'
length_bemerkungen = index_auftrag - index_bemerkungen - 1
erfasser = splited[index_dispo - 2]
auftrag = splited[index_erfasser + 2]
datum = splited[index_erfasser + 3]
zeit = splited[index_erfasser + 4]
einsatz = splited[index_einsatz - 6]
sondersignal = splited[index_einsatz - 5]
ort = splited[index_einsatz - 3]
strasse = splited[index_einsatz - 2]
# sometimes there is just a phone number for the field melder but on
# the second line, so the lines vary for erfasser and melder
if index_dispo - index_erfasser == 10:
melder = splited[index_dispo - 4] + ', ' + splited[index_dispo - 3]
else:
melder = splited[index_dispo - 4]
# BMA style
elif index_bemerkungen == 20:
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
length_bemerkungen = index_dispo - index_bemerkungen - 1
erfasser = splited[index_bemerkungen - 2]
auftrag = splited[index_einsatzauftragfw + 2]
datum = splited[index_einsatzauftragfw + 3]
zeit = splited[index_einsatzauftragfw + 4]
einsatz = splited[index_einsatz + 6]
sondersignal = splited[index_einsatz + 7]
ort = splited[index_einsatz + 9]
strasse = splited[index_einsatz + 10]
melder = 'BMA' # There is no melder on a BMA Einsatzausdruck
elif index_bemerkungen == 21 or index_bemerkungen == 22:
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
length_bemerkungen = index_dispo - index_bemerkungen - 1
erfasser = splited[index_bemerkungen - 2]
auftrag = splited[index_erfasser + 2]
datum = splited[index_erfasser + 3]
zeit = splited[index_erfasser + 4]
einsatz = splited[index_einsatz - 6]
sondersignal = splited[index_einsatz - 5]
ort = splited[index_einsatz - 3]
strasse = splited[index_einsatz - 2]
if index_bemerkungen - index_erfasser == 10:
melder = splited[index_bemerkungen - 4] + ', ' + splited[index_bemerkungen - 3]
else:
melder = splited[index_bemerkungen - 4]
elif index_bemerkungen == 24:
self.logger.info('[%s] Found Bemerkungen on line %s', f_id, index_bemerkungen)
length_bemerkungen = index_dispo - index_bemerkungen - 1
erfasser = splited[index_bemerkungen - 2]
auftrag = splited[index_einsatzauftragfw + 4]
datum = splited[index_einsatzauftragfw + 9]
zeit = splited[index_einsatzauftragfw + 10]
einsatz = splited[index_einsatz - 4]
sondersignal = splited[index_einsatz - 3]
ort = ''
strasse = splited[index_einsatz - 2]
melder = splited[index_dispo - 8] + ', ' + splited[index_dispo - 7]
else:
self.logger.error('[%s] Unknown location of Bemerkungen. Line %s', f_id, index_bemerkungen)
return False
# sanity check to see if we can correlate the f_id
if f_id == auftrag:
self.logger.info('[%s] ID matches in PDF', f_id)
else:
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, auftrag)
return False
# try to find out if there is a hinweis
# if yes, the difference between the indexes is 4, else it's shorter
if index_maps - index_hinweis == 4:
hinweis = splited[index_hinweis+2]
else:
hinweis = ''
data = {
'auftrag': auftrag,
'datum': datum,
'zeit': zeit,
'melder': melder,
'erfasser': erfasser,
'bemerkungen': self.concatenate_to_multiline_string(
splited,
index_bemerkungen + 1,
index_bemerkungen + length_bemerkungen
).rstrip(),
'einsatz': einsatz,
'sondersignal': sondersignal,
'ort': ort.title(),
'strasse': strasse.title(),
#'objekt': splited[],
'hinweis': hinweis,
}
return data
def extract_einsatzprotokoll(self, file, f_id):
""" extracts as many information from the parsed Einsatzprotokoll as possible """
splited = self.convert(file).splitlines()
# sanity check to see if we can correlate the f_id
if f_id == splited[26]:
self.logger.info('[%s] ID matches in PDF', f_id)
else:
self.logger.error('[%s] ID does not match in PDF', f_id)
return False
data = {
'auftrag': splited[26],
'datum': splited[25],
'angelegt': splited[28],
'disposition': splited[30],
'ausgerueckt': splited[32],
'anort': splited[33],
}
return data

142
library/pdftotext.py Normal file
View File

@ -0,0 +1,142 @@
#!/usr/bin/env python3
""" extracts data from ELZ PDFs using Poppler pdftotext """
import subprocess
import logging
class PDFParsing:
""" PDF parsing """
def __init__(self):
self.logger = logging.getLogger(__name__)
self.logger.info('PDF parsing based on pdftotext loaded')
def extract(self, f_id, file, datafields):
self.logger.info('[%s] parsing PDF file %s', f_id, file)
data = {}
for field, coordinate in datafields.items():
# x-coordinate of the crop area top left corner
x = coordinate['xMin']
# y-coordinate of the crop area top left corner
y = coordinate['yMin']
# width of crop area in pixels
w = coordinate['xMax'] - coordinate['xMin']
# height of crop area in pixels
h = coordinate['yMax'] - coordinate['yMin']
self.logger.debug('[%s] Computed command for field %s: %s', f_id, field,
'pdftotext -f 1 -l 1 -x {} -y {} -W {} -H {}'.format(x,y,w,h)
)
scrapeddata = subprocess.Popen([
'/usr/bin/pdftotext',
'-f', '1',
'-l', '1',
'-x', str(x),
'-y', str(y),
'-W', str(w),
'-H', str(h),
file,
'-'
],
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT,
text=True)
stdout, _ = scrapeddata.communicate()
## TODO: fixup some fields (lowercase, remove unnecessary \n)
if 'edit' in coordinate and coordinate['edit'] == 'title':
data[field] = stdout.rstrip().title()
else:
data[field] = stdout.rstrip()
# sanity check to see if we can correlate the f_id
if f_id == data['auftrag']:
self.logger.debug('[%s] ID matches in PDF', f_id)
return data
else:
self.logger.error('[%s] ID does not match in PDF: "%s"', f_id, data['auftrag'])
return False
def extract_einsatzausdruck(self, file, f_id):
""" extracts information from Einsatzausdruck using external pdftotext """
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
# Get them using 'pdftotext -bbox'
# y = row
# x = column: xMax 450 / 590 means full width
coordinates = {
'auftrag': {
'xMin': 70, 'yMin': 47, 'xMax': 120,'yMax': 58,
},
'datum': {
'xMin': 190, 'yMin': 47, 'xMax': 239, 'yMax': 58,
},
'zeit': {
'xMin': 190, 'yMin': 59, 'xMax': 215, 'yMax': 70,
},
'melder': {
'xMin': 304, 'yMin': 47, 'xMax': 446, 'yMax': 70, 'edit': 'title'
},
'erfasser':{
'xMin': 448, 'yMin': 59, 'xMax': 478, 'yMax': 70,
},
# big field until "Disponierte Einheiten"
'bemerkungen': {
'xMin': 28, 'yMin': 112, 'xMax': 590, 'yMax': 350,
},
'disponierteeinheiten': {
'xMin': 28, 'yMin': 366, 'xMax': 450, 'yMax': 376,
},
'einsatz': {
'xMin': 76, 'yMin': 690, 'xMax': 450, 'yMax': 703,
},
'sondersignal': {
'xMin': 76, 'yMin': 707, 'xMax': 450, 'yMax': 721,
},
'ort': {
'xMin': 76, 'yMin': 732, 'xMax': 590, 'yMax': 745,
},
'hinweis': {
'xMin': 76, 'yMin': 773, 'xMax': 450, 'yMax': 787,
},
}
return self.extract(f_id, file, coordinates)
def extract_einsatzprotokoll(self, file, f_id):
""" extracts information from Einsatzprotokoll using external pdftotext """
self.logger.debug('[%s] Parsing PDF: %s', f_id, file)
# Get them using 'pdftotext -bbox'
# y = row
# x = column: xMax 450 / 590 means full width
coordinates = {
'auftrag': {
'xMin': 192, 'yMin': 132, 'xMax': 238,'yMax': 142,
},
'angelegt': {
'xMin': 192, 'yMin': 294, 'xMax': 226, 'yMax': 304,
},
'dispo': {
'xMin': 192, 'yMin': 312, 'xMax': 226, 'yMax': 322,
},
'ausgerueckt': {
'xMin': 192, 'yMin': 331, 'xMax': 226, 'yMax': 341,
},
'vorort':{
'xMin': 192, 'yMin': 348, 'xMax': 226, 'yMax': 358,
},
}
return self.extract(f_id, file, coordinates)

67
main.py
View File

@ -8,13 +8,12 @@ import time
import requests
from dotenv import find_dotenv, load_dotenv
from pushover import Client
# local classes
from library.emailhandling import EmailHandling
from library.lodur import Lodur
from library.mqtt import MQTTClient
from library.gotify import GotifyClient
from library.pdf_extract import PDFHandling
from library.pdftotext import PDFParsing
from library.webdav import WebDav
# Configuration
@ -29,17 +28,13 @@ WEBDAV_USERNAME = os.getenv("WEBDAV_USERNAME")
WEBDAV_PASSWORD = os.getenv("WEBDAV_PASSWORD")
WEBDAV_BASEDIR = os.getenv("WEBDAV_BASEDIR")
TMP_DIR = os.getenv("TMP_DIR", "/tmp")
MQTT_SERVER = os.getenv("MQTT_SERVER")
MQTT_USER = os.getenv("MQTT_USER")
MQTT_PASSWORD = os.getenv("MQTT_PASSWORD")
MQTT_BASE_TOPIC = os.getenv("MQTT_BASE_TOPIC", "pylokid")
LODUR_USER = os.getenv("LODUR_USER")
LODUR_PASSWORD = os.getenv("LODUR_PASSWORD")
LODUR_BASE_URL = os.getenv("LODUR_BASE_URL")
HEARTBEAT_URL = os.getenv("HEARTBEAT_URL")
GOTIFY_URL = os.getenv("GOTIFY_URL")
GOTIFY_TOKEN = os.getenv("GOTIFY_TOKEN")
PYLOKID_VERSION = "1.2.0"
PUSHOVER_API_TOKEN = os.getenv("PUSHOVER_API_TOKEN")
PUSHOVER_USER_KEY = os.getenv("PUSHOVER_USER_KEY")
PYLOKID_VERSION = "2.0.0"
def main():
""" main """
@ -77,22 +72,14 @@ def main():
TMP_DIR,
)
# Initialize MQTT Sessions
mqtt_client = MQTTClient(
MQTT_SERVER,
MQTT_USER,
MQTT_PASSWORD,
MQTT_BASE_TOPIC,
)
# Initialize Gotify
gotify_client = GotifyClient(
GOTIFY_URL,
GOTIFY_TOKEN,
# Initialize Pushover
pushover = Client(
user_key=PUSHOVER_USER_KEY,
api_token=PUSHOVER_API_TOKEN
)
# Initialize PDF Parser
pdf = PDFHandling()
pdf = PDFParsing()
# Main Loop
while True:
@ -133,9 +120,25 @@ def main():
f_id,
)
# publish Einsatz on MQTT and Gotify
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
# publish Einsatz on Pushover
logger.info(
'[%s] Publishing message on Pushover', f_id
)
pushover.send_message(
"Einsatz {} eröffnet: {}\n\n* Ort: {}\n* Melder: {}\n* Hinweis: {}\n* {}\n\n{}\n\n{}".format(
f_id,
pdf_data['einsatz'],
pdf_data['ort'],
pdf_data['melder'].replace('\n',' '),
pdf_data['hinweis'],
pdf_data['sondersignal'],
pdf_data['disponierteeinheiten'],
pdf_data['bemerkungen'],
),
title="Feuerwehr Einsatz",
url="https://www.google.com/maps/search/?api=1&query={}".format(pdf_data['ort']),
url_title="Ort auf Karte suchen"
)
# create new Einsatzrapport in Lodur
lodur_client.einsatzrapport(
@ -173,9 +176,15 @@ def main():
# Update entry in Lodur with parse PDF data
lodur_client.einsatzprotokoll(f_id, pdf_data, webdav_client)
# Einsatz finished - publish on MQTT and Gotify
mqtt_client.send_message(f_type, f_id, pdf_data, pdf_file)
gotify_client.send_message(f_type, f_id, pdf_data, pdf_file)
# Einsatz finished - publish on pushover
logger.info(
'[%s] Publishing message on Pushover', f_id
)
pushover.send_message(
"Einsatz {} beendet".format(f_id),
title="Feuerwehr Einsatz beendet",
)
else:
logger.error(
'[%s] Cannot process Einsatzprotokoll as there is no Lodur ID',

View File

@ -1,6 +1,7 @@
aioeasywebdav==2.4.0
MechanicalSoup==0.9.0.post4
paho-mqtt==1.3.1
pdfminer.six==20170720
python-dotenv==0.7.1
# MechanicalSoup > 0.11.0 produces "TypeError: expected string or bytes-like
# object" on file upload
MechanicalSoup==0.11.0
python-dotenv==0.10.3
requests>=2.20.0
python-pushover==0.4

View File

@ -1,21 +0,0 @@
import re
import logging
from pprint import pprint
from pathlib import Path
from library.pdf_extract import PDFHandling
PATH = '/tmp/pylokid'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
PDF = PDFHandling()
for path in Path(PATH).glob('**/*.pdf'):
file = str(path)
print(file)
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
print(f_id)
pprint(PDF.extract_einsatzausdruck(file, f_id))

30
test_pdftotext.py Normal file
View File

@ -0,0 +1,30 @@
import re
import logging
from pprint import pprint
from pathlib import Path
from library.pdftotext import PDFParsing
PATH = '/home/tobru/Documents/Feuerwehr/Stab/Fourier/Einsatzdepeschen/2019'
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
PDF = PDFParsing()
for path in Path(PATH).glob('**/Einsatzausdruck*.pdf'):
file = str(path)
print(file)
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
print(f_id)
pprint(PDF.extract_einsatzausdruck(file, f_id))
"""
for path in Path(PATH).glob('**/Einsatzprotokoll*.pdf'):
file = str(path)
print(file)
f_id = re.search('.*(F[0-9]{8})_.*', file).group(1)
print(f_id)
pprint(PDF.extract_einsatzprotokoll(file, f_id))
"""