initial work on pdf data extract
This commit is contained in:
parent
d3885c47d9
commit
9ff811f1ba
|
@ -4,7 +4,7 @@ import re
|
|||
from datetime import datetime
|
||||
import requests
|
||||
|
||||
def create_einsatzrapport(username, password, base_url, f_id):
|
||||
def create_einsatzrapport(username, password, base_url, f_id, pdf_data):
|
||||
|
||||
session = requests.session()
|
||||
login_data = {
|
||||
|
@ -40,7 +40,7 @@ def create_einsatzrapport(username, password, base_url, f_id):
|
|||
'ztb_h': (None, '12'), # 05. Zeit bis
|
||||
'ztb_m': (None, '12'), # 05. Zeit bis
|
||||
'e_ort_1': (None, '306'), # 06. Einsatzort: Urdorf 306, Birmensdorf 298
|
||||
'eins_ereig': (None, f_id), # 07. Ereignis
|
||||
'eins_ereig': (None, pdf_data['einsatz']), # 07. Ereignis # TODO utf-8?
|
||||
'adr': (None, 'TBD'), # 08. Adresse
|
||||
#'zh_alarmierung_h': (None, 'UNKNOWN'), # 12. Alarmierung
|
||||
#'zh_alarmierung_m': (None, 'UNKNOWN'), # 12. Alarmierung
|
||||
|
|
5
main.py
5
main.py
|
@ -16,6 +16,7 @@ import aioeasywebdav
|
|||
from dotenv import load_dotenv, find_dotenv
|
||||
import paho.mqtt.client as mqtt
|
||||
from lodur_connect import create_einsatzrapport, upload_alarmdepesche
|
||||
import pdf_extract
|
||||
|
||||
_EMAIL_SUBJECTS = '(OR SUBJECT "Einsatzausdruck_FW" SUBJECT "Einsatzprotokoll" UNSEEN)'
|
||||
_INTERVAL = 10
|
||||
|
@ -236,6 +237,9 @@ def main():
|
|||
# this is real - publish Einsatz on MQTT
|
||||
mqtt_client.publish('pylokid/' + f_type, f_id)
|
||||
|
||||
# get as many information from PDF as possible
|
||||
pdf_data = pdf_extract.get_einsatzausdruck(os.path.join(tmp_dir, file_name))
|
||||
|
||||
# create new Einsatzrapport in Lodur
|
||||
logger.info('Creating Einsatzrapport in Lodur for ' + f_id)
|
||||
lodur_id = create_einsatzrapport(
|
||||
|
@ -243,6 +247,7 @@ def main():
|
|||
lodur_password,
|
||||
lodur_base_url,
|
||||
f_id,
|
||||
pdf_data,
|
||||
)
|
||||
logger.info('Sent data to Lodur. Assigned Lodur ID: ' + lodur_id)
|
||||
# store lodur id in webdav
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import io
|
||||
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
||||
from pdfminer.converter import HTMLConverter,TextConverter,XMLConverter
|
||||
from pdfminer.layout import LAParams
|
||||
from pdfminer.pdfpage import PDFPage
|
||||
|
||||
def concatenate_to_multiline_string(data, start, end):
|
||||
res = ''
|
||||
counter = start
|
||||
while counter <= end:
|
||||
res += data[counter] + '\n'
|
||||
counter += 1
|
||||
return res
|
||||
|
||||
def convert(file):
|
||||
pagenums = set()
|
||||
manager = PDFResourceManager()
|
||||
codec = 'utf-8'
|
||||
caching = True
|
||||
|
||||
output = io.StringIO()
|
||||
converter = TextConverter(manager, output, codec=codec, laparams=LAParams())
|
||||
|
||||
interpreter = PDFPageInterpreter(manager, converter)
|
||||
infile = open(file, 'rb')
|
||||
|
||||
for page in PDFPage.get_pages(infile, pagenums, caching=caching, check_extractable=True):
|
||||
interpreter.process_page(page)
|
||||
|
||||
convertedPDF = output.getvalue()
|
||||
|
||||
infile.close()
|
||||
converter.close()
|
||||
output.close()
|
||||
return convertedPDF
|
||||
|
||||
def get_einsatzausdruck(file):
|
||||
""" extracts as many information from the parsed Einsatzausdruck as possible """
|
||||
|
||||
splited = convert(file).splitlines()
|
||||
# sometimes the "second part - below map" doesnt start at the same index
|
||||
# depending on the lenght of the bemerkungen
|
||||
# therefore we compute a simple offset for the second part
|
||||
# TODO: make it better
|
||||
second_part_offset = 29 - splited.index('Disponierte Einheiten')
|
||||
data = {
|
||||
'auftrag': splited[14],
|
||||
'datum': splited[15],
|
||||
'zeit': splited[16],
|
||||
'melder': concatenate_to_multiline_string(splited,18,19),
|
||||
'erfasser': splited[20],
|
||||
'bemerkungen': concatenate_to_multiline_string(splited,23,28),
|
||||
'einsatz': splited[34-second_part_offset],
|
||||
'ort': splited[37-second_part_offset],
|
||||
'strasse': splited[38-second_part_offset],
|
||||
#'objekt': splited[],
|
||||
'hinweis': splited[50-second_part_offset]
|
||||
}
|
||||
return data
|
||||
|
||||
def get_einsatzprotokoll(file):
|
||||
""" extracts as many information from the parsed Einsatzprotokoll as possible """
|
||||
|
||||
splited = convert(file).splitlines()
|
||||
data = {
|
||||
'auftrag': splited[26],
|
||||
'datum': splited[25],
|
||||
'angelegt': splited[28],
|
||||
'disposition': splited[30],
|
||||
'ausgerueckt': splited[32],
|
||||
'anort': splited[33],
|
||||
}
|
||||
return data
|
Loading…
Reference in New Issue