add ansible role journal-postfix (a log parser for Postfix) with playbook and doc
This commit is contained in:
		
							parent
							
								
									713372c850
								
							
						
					
					
						commit
						e5a8025064
					
				
					 14 changed files with 3570 additions and 0 deletions
				
			
		
							
								
								
									
										17
									
								
								journal-postfix/files/journal-postfix.service
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								journal-postfix/files/journal-postfix.service
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,17 @@ | |||
| # this file is part of ansible role journal-postfix | ||||
| 
 | ||||
| [Unit] | ||||
| Description=Extract postfix message delivery information from systemd journal messages\ | ||||
| and store them in a PostgreSQL database. Configuration is in /etc/journal-postfix/main.yml | ||||
| After=multi-user.target | ||||
| 
 | ||||
| [Service] | ||||
| Type=simple | ||||
| ExecStart=/srv/journal-postfix/run.py | ||||
| User=journal-postfix | ||||
| WorkingDirectory=/srv/journal-postfix/ | ||||
| Restart=on-failure | ||||
| RestartPreventExitStatus=97 | ||||
| 
 | ||||
| [Install] | ||||
| WantedBy=multi-user.target | ||||
							
								
								
									
										85
									
								
								journal-postfix/files/srv/README.md
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								journal-postfix/files/srv/README.md
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,85 @@ | |||
| Parse postfix entries in systemd journal and collect delivery information. | ||||
| 
 | ||||
| The information on mail deliveries is written to tables in a PostgreSQL | ||||
| database. The database can then be queried by a UI showing delivery status | ||||
| to end users. The UI is not part of this package. | ||||
| 
 | ||||
| This software is tailor-made for debian buster with systemd as init system. | ||||
| It is meant to run on the same system on which Postfix is running, | ||||
| or on a system receiving the log stream of a Postfix instance in its | ||||
| systemd journal. | ||||
| 
 | ||||
| Prerequisites / Postfix configuration: | ||||
| 
 | ||||
|   - Access to a PostgreSQL database. | ||||
|   - Postfix: Only virtual mailboxes are supported. | ||||
|   - Postfix: You can use short or long queue_ids (see | ||||
|     http://www.postfix.org/postconf.5.html#enable_long_queue_ids), | ||||
|     but since the uniqueness of short queue_ids is very limited, | ||||
|     usage of long queue_ids is *strongly recommended*. | ||||
| 
 | ||||
| Installation: | ||||
| 
 | ||||
|   - apt install python3-psycopg2 python3-systemd python3-yaml | ||||
|   - Edit /etc/journal-postfix/main.yml | ||||
|   - Output is written to the journal (unit journal-postfix). READ IT! | ||||
| 
 | ||||
| Side effects (database): | ||||
| 
 | ||||
|   - The configured database user will create the tables | ||||
|     - delivery_from | ||||
|     - delivery_to | ||||
|     - noqueue | ||||
|     in the configured database, if they do not yet exist. | ||||
|     These tables will be filled with results from parsing the journal. | ||||
|     Table noqueue contains deliveries rejected by smtpd before they | ||||
|     got a queue_id. Deliveries with queue_id are in tables delivery_from | ||||
|     and delivery_to, which are separate, because an email can have only | ||||
|     one sender, but more than one recipient. Entries in both tables are | ||||
|     related through the queue_id and the approximate date; note that | ||||
|     short queue_ids are not unique for a delivery transaction, so | ||||
|     consider changing your Postfix configuration to long queue_ids. | ||||
|   - Log output is written to journald, unit journal-postfix. | ||||
| 
 | ||||
| Configuration: | ||||
| 
 | ||||
|   - Edit the config file in YAML format located at | ||||
|     /etc/journal-postfix/main.conf | ||||
| 
 | ||||
| Limitations: | ||||
| 
 | ||||
|   - The log output of Postfix may contain messages not primarily relevant | ||||
|     for delivery, namely messages of levels panic, fatal, error, warning. | ||||
|     They are discarded. | ||||
|   - The postfix server must be configured to use virtual mailboxes; | ||||
|     deliveries to local mailboxes are ignored. | ||||
|   - Parsing is specific to a Postfix version and only version 3.4.5 | ||||
|     (the version in Debian buster) is supported; it is intended to support | ||||
|     Postfix versions in future stable Debian releases. | ||||
|   - This script does not support concurrency; we assume that there is only | ||||
|     one process writing to the database tables. Thus clustered postfix | ||||
|     setups are not supported. | ||||
| 
 | ||||
| Options: | ||||
| 
 | ||||
|   - If you use dovecot as lmtpd, you will also get dovecot_ids upon | ||||
|     successful delivery. | ||||
|   - If you have configured Postfix to store VERP-ids of outgoing mails | ||||
|     in table 'mail_from' in the same database, then bounce emails can | ||||
|     be associated with original emails. The VERP-ids must have a certain | ||||
|     format. | ||||
|   - The subject of emails will be extracted from log messages starting | ||||
|     with "info: header Subject:". To enable these messages configure | ||||
|     Postfix like this: Enabled header_checks in main.cf ( | ||||
|         header_checks = regexp:/etc/postfix/header_checks | ||||
|     ) and put this line into /etc/postfix/header_checks: | ||||
|         /^Subject:/ INFO | ||||
|   - You can also import log messages from a log file in syslog format: | ||||
|     Run this script directly from command line with options --file | ||||
|     (the path to the file to be parsed) and --year (the year of the | ||||
|     first message in this log file). | ||||
|     Note: For the name of the month to be recognized correctly, the | ||||
|     script must be run with this locale. | ||||
|     Attention: When running from the command line, log output will | ||||
|     not be sent to unit journal-postfix; use this command instead: | ||||
|     journalctl --follow SYSLOG_IDENTIFIER=python3 | ||||
							
								
								
									
										1514
									
								
								journal-postfix/files/srv/parser.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										1514
									
								
								journal-postfix/files/srv/parser.py
									
										
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							
							
								
								
									
										212
									
								
								journal-postfix/files/srv/run.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										212
									
								
								journal-postfix/files/srv/run.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,212 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Main script to be run as a systemd unit or manually. | ||||
| """ | ||||
| 
 | ||||
| import argparse | ||||
| import datetime | ||||
| import os | ||||
| import sys | ||||
| from pprint import pprint | ||||
| from typing import Iterable, List, Optional, Tuple, Union | ||||
| import psycopg2 | ||||
| import psycopg2.extras | ||||
| from systemd import journal | ||||
| import settings | ||||
| from parser import init_parser, parse_entry, extract_delivery | ||||
| from sources import ( | ||||
|     iter_journal_messages_since, | ||||
|     iter_journal_messages_follow, | ||||
|     iter_logfile_messages, | ||||
| ) | ||||
| from storage import ( | ||||
|     init_db, | ||||
|     init_session, | ||||
|     get_latest_timestamp, | ||||
|     delete_old_deliveries, | ||||
|     store_delivery_items, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| exit_code_without_restart = 97 | ||||
| 
 | ||||
| 
 | ||||
| def run( | ||||
|     dsn: str, | ||||
|     verp_marker: Optional[str] = None, | ||||
|     filepath: Optional[str] = None, | ||||
|     year: Optional[int] = None, | ||||
|     debug: List[str] = [], | ||||
| ) -> None: | ||||
|     """ | ||||
|     Determine loop(s) and run them within a database context. | ||||
|     """ | ||||
|     init_parser(verp_marker=verp_marker) | ||||
|     with psycopg2.connect(dsn) as conn: | ||||
|         with conn.cursor( | ||||
|             cursor_factory=psycopg2.extras.RealDictCursor | ||||
|         ) as curs: | ||||
|             init_session(curs) | ||||
|             if filepath and year: | ||||
|                 run_loop( | ||||
|                     iter_logfile_messages(filepath, year), curs, debug=debug | ||||
|                 ) | ||||
|             else: | ||||
|                 begin_timestamp = get_latest_timestamp(curs) | ||||
|                 run_loop( | ||||
|                     iter_journal_messages_since(begin_timestamp), | ||||
|                     curs, | ||||
|                     debug=debug, | ||||
|                 ) | ||||
|                 begin_timestamp = get_latest_timestamp(curs) | ||||
|                 run_loop( | ||||
|                     iter_journal_messages_follow(begin_timestamp), | ||||
|                     curs, | ||||
|                     debug=debug, | ||||
|                 ) | ||||
| 
 | ||||
| 
 | ||||
| def run_loop( | ||||
|     iterable: Iterable[Tuple[bool, Optional[dict]]], | ||||
|     curs: psycopg2.extras.RealDictCursor, | ||||
|     debug: List[str] = [] | ||||
| ) -> None: | ||||
|     """ | ||||
|     Loop over log entries obtained from *iterable*. | ||||
| 
 | ||||
|     Parse the message, extract delivery information from it and store | ||||
|     that delivery information. | ||||
| 
 | ||||
|     For performance reasons delivery items are collected in a cache | ||||
|     before writing them (i.e., committing a database transaction). | ||||
|     """ | ||||
|     cache = [] | ||||
|     msg_count = settings.max_messages_per_commit | ||||
|     last_delete = None | ||||
|     for commit, msg_details in iterable: | ||||
|         parsed_entry = None | ||||
|         if msg_details: | ||||
|             parsed_entry = parse_entry(msg_details) | ||||
|             if 'all' in debug or ( | ||||
|                 parsed_entry and parsed_entry.get('comp') in debug | ||||
|             ): | ||||
|                 print('_' * 80) | ||||
|                 print('MSG_DETAILS:', msg_details) | ||||
|                 print('PARSED_ENTRY:', parsed_entry) | ||||
|             if parsed_entry: | ||||
|                 errors, delivery = extract_delivery(msg_details, parsed_entry) | ||||
|                 if not errors and delivery: | ||||
|                     if 'all' in debug or parsed_entry.get('comp') in debug: | ||||
|                         print('DELIVERY:') | ||||
|                         pprint(delivery) | ||||
|                     # it may happen that a delivery of type 'from' has | ||||
|                     # a recipient; in this case add a second delivery | ||||
|                     # of type 'to' to the cache, but only for deliveries | ||||
|                     # with queue_id | ||||
|                     if ( | ||||
|                         delivery['type'] == 'from' | ||||
|                         and 'recipient' in delivery | ||||
|                         and delivery.get('queue_id') | ||||
|                     ): | ||||
|                         delivery2 = delivery.copy() | ||||
|                         delivery2['type'] = 'to' | ||||
|                         cache.append(delivery2) | ||||
|                         del delivery['recipient'] | ||||
|                     cache.append(delivery) | ||||
|                     msg_count -= 1 | ||||
|                     if msg_count == 0: | ||||
|                         commit = True | ||||
|                 elif errors: | ||||
|                     msg = ( | ||||
|                         f'Extracting delivery from parsed entry failed: ' | ||||
|                         f'errors={errors}; msg_details={msg_details}; ' | ||||
|                         f'parsed_entry={parsed_entry}' | ||||
|                     ) | ||||
|                     journal.send(msg, PRIORITY=journal.LOG_CRIT) | ||||
|                     if 'all' in debug or parsed_entry.get('comp') in debug: | ||||
|                         print('EXTRACTION ERRORS:', errors) | ||||
|         if commit: | ||||
|             if 'all' in debug: | ||||
|                 print('.' * 40, 'committing') | ||||
|             # store cache, clear cache, reset message counter | ||||
|             store_delivery_items(curs, cache, debug=debug) | ||||
|             cache = [] | ||||
|             msg_count = settings.max_messages_per_commit | ||||
|         now = datetime.datetime.utcnow() | ||||
|         if last_delete is None or last_delete < now - settings.delete_interval: | ||||
|             delete_old_deliveries(curs) | ||||
|             last_delete = now | ||||
|             if 'all' in debug: | ||||
|                 print('.' * 40, 'deleting old deliveries') | ||||
|     else: | ||||
|         store_delivery_items(curs, cache, debug=debug) | ||||
| 
 | ||||
| 
 | ||||
| def main() -> None: | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument( | ||||
|         '--debug', | ||||
|         help='Comma-separated list of components to be debugged; ' | ||||
|         'valid component names are the Postfix components ' | ||||
|         'plus "sql" plus "all".', | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         '--file', | ||||
|         help='File path of a Postfix logfile in syslog ' | ||||
|         'format to be parsed instead of the journal', | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         '--year', | ||||
|         help='If --file is given, we need to know ' | ||||
|         'the year of the first line in the logfile', | ||||
|     ) | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     config = settings.get_config() | ||||
|     if config: | ||||
|         # check if startup is enabled or fail | ||||
|         msg = None | ||||
|         if 'startup' not in config: | ||||
|             msg = 'Parameter "startup" is not configured.' | ||||
|         elif not config['startup']: | ||||
|             msg = 'Startup is not enabled in the config file.' | ||||
|         if msg: | ||||
|             journal.send(msg, PRIORITY=journal.LOG_CRIT) | ||||
|             sys.exit(exit_code_without_restart) | ||||
|         # check more params and call run | ||||
|         try: | ||||
|             verp_marker = config['postfix']['verp_marker'] | ||||
|         except Exception: | ||||
|             verp_marker = None | ||||
|         debug: List[str] = [] | ||||
|         if args.debug: | ||||
|             debug = args.debug.split(',') | ||||
|         filepath = None | ||||
|         year = None | ||||
|         if args.file: | ||||
|             filepath = args.file | ||||
|             if not args.year: | ||||
|                 print( | ||||
|                     'If --file is given, we need to know the year' | ||||
|                     ' of the first line in the logfile. Please use --year.' | ||||
|                 ) | ||||
|                 sys.exit(1) | ||||
|             else: | ||||
|                 year = int(args.year) | ||||
|         dsn = init_db(config) | ||||
|         if dsn: | ||||
|             run( | ||||
|                 dsn, | ||||
|                 verp_marker=verp_marker, | ||||
|                 filepath=filepath, | ||||
|                 year=year, | ||||
|                 debug=debug, | ||||
|             ) | ||||
|     else: | ||||
|         print('Config invalid, see journal.') | ||||
|         sys.exit(exit_code_without_restart) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     main() | ||||
							
								
								
									
										125
									
								
								journal-postfix/files/srv/settings.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										125
									
								
								journal-postfix/files/srv/settings.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,125 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Settings for journal-postfix. | ||||
| """ | ||||
| 
 | ||||
| import os | ||||
| import datetime | ||||
| from typing import Union, Optional | ||||
| from systemd import journal | ||||
| from yaml import load | ||||
| 
 | ||||
| 
 | ||||
| main_config_file: str = '/etc/journal-postfix/main.yml' | ||||
| """ | ||||
| Filepath to the main config file. | ||||
| 
 | ||||
| Can be overriden by environment variable JOURNAL_POSTFIX_MAIN_CONF. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| systemd_unitname: str = 'postfix@-.service' | ||||
| """ | ||||
| Name of the systemd unit running the postfix service. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| journal_poll_interval: Union[float, int] = 10.0 | ||||
| """ | ||||
| Poll timeout in seconds for fetching messages from the journal. | ||||
| 
 | ||||
| Will be overriden if set in the main config. | ||||
| 
 | ||||
| If the poll times out, it is checked whether the last commit | ||||
| lies more than max_delay_before_commit seconds in the past; | ||||
| if so, the current database transaction will be committed. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| max_delay_before_commit: datetime.timedelta = datetime.timedelta(seconds=30) | ||||
| """ | ||||
| How much time may pass before committing a database transaction? | ||||
| 
 | ||||
| Will be overriden if set in the main config. | ||||
| 
 | ||||
| (The actual maximal delay can be one journal_poll_interval in addition.) | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| max_messages_per_commit: int = 1000 | ||||
| """ | ||||
| How many messages to cache at most before committing a database transaction? | ||||
| 
 | ||||
| Will be overriden if set in the main config. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| delete_deliveries_after_days: int = 0 | ||||
| """ | ||||
| After how many days shall deliveries be deleted from the database? | ||||
| 
 | ||||
| A value of 0 means that data are never deleted. | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| def get_config() -> Optional[dict]: | ||||
|     """ | ||||
|     Load config from the main config and return it. | ||||
| 
 | ||||
|     The default main config file path (global main_config_file) | ||||
|     can be overriden with environment variable | ||||
|     JOURNAL_POSTFIX_MAIN_CONF. | ||||
|     """ | ||||
|     try: | ||||
|         filename = os.environ['JOURNAL_POSTFIX_MAIN_CONF'] | ||||
|         global main_config_file | ||||
|         main_config_file = filename | ||||
|     except Exception: | ||||
|         filename = main_config_file | ||||
|     try: | ||||
|         with open(filename, 'r') as config_file: | ||||
|             config_raw = config_file.read() | ||||
|     except Exception: | ||||
|         msg = f'ERROR: cannot read config file {filename}' | ||||
|         journal.send(msg, PRIORITY=journal.LOG_CRIT) | ||||
|         return None | ||||
|     try: | ||||
|         config = load(config_raw) | ||||
|     except Exception as err: | ||||
|         msg = f'ERROR: invalid yaml syntax in {filename}: {err}' | ||||
|         journal.send(msg, PRIORITY=journal.LOG_CRIT) | ||||
|         return None | ||||
|     # override some global variables | ||||
|     _global_value_from_config(config['postfix'], 'systemd_unitname', str) | ||||
|     _global_value_from_config(config, 'journal_poll_interval', float) | ||||
|     _global_value_from_config(config, 'max_delay_before_commit', 'seconds') | ||||
|     _global_value_from_config(config, 'max_messages_per_commit', int) | ||||
|     _global_value_from_config(config, 'delete_deliveries_after_days', int) | ||||
|     _global_value_from_config(config, 'delete_interval', 'seconds') | ||||
|     return config | ||||
| 
 | ||||
| 
 | ||||
| def _global_value_from_config( | ||||
|     config, name: str, type_: Union[type, str] | ||||
| ) -> None: | ||||
|     """ | ||||
|     Set a global variable to the value obtained from *config*. | ||||
| 
 | ||||
|     Also cast to *type_*. | ||||
|     """ | ||||
|     try: | ||||
|         value = config.get(name) | ||||
|         if type_ == 'seconds': | ||||
|             value = datetime.timedelta(seconds=float(value)) | ||||
|         else: | ||||
|             value = type_(value)  # type: ignore | ||||
|         globals()[name] = value | ||||
|     except Exception: | ||||
|         if value is not None: | ||||
|             msg = f'ERROR: configured value of {name} is invalid.' | ||||
|             journal.send(msg, PRIORITY=journal.LOG_ERR) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     print(get_config()) | ||||
							
								
								
									
										5
									
								
								journal-postfix/files/srv/setup.cfg
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								journal-postfix/files/srv/setup.cfg
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,5 @@ | |||
| [pycodestyle] | ||||
| max-line-length = 200 | ||||
| 
 | ||||
| [mypy] | ||||
| ignore_missing_imports = True | ||||
							
								
								
									
										178
									
								
								journal-postfix/files/srv/sources.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										178
									
								
								journal-postfix/files/srv/sources.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,178 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Data sources. | ||||
| 
 | ||||
| Note: python-systemd journal docs are at | ||||
| https://www.freedesktop.org/software/systemd/python-systemd/journal.html | ||||
| """ | ||||
| 
 | ||||
| import datetime | ||||
| import select | ||||
| from typing import Iterable, Optional, Tuple, Union | ||||
| from systemd import journal | ||||
| import settings | ||||
| 
 | ||||
| 
 | ||||
| def iter_journal_messages_since( | ||||
|     timestamp: Union[int, float] | ||||
| ) -> Iterable[Tuple[bool, dict]]: | ||||
|     """ | ||||
|     Yield False and message details from the journal since *timestamp*. | ||||
| 
 | ||||
|     This is the loading phase (loading messages that already existed | ||||
|     when we start). | ||||
| 
 | ||||
|     Argument *timestamp* is a UNIX timestamp. | ||||
| 
 | ||||
|     Only journal entries for systemd unit settings.systemd_unitname with | ||||
|     loglevel INFO and above are retrieved. | ||||
|     """ | ||||
|     timestamp = float(timestamp) | ||||
|     sdj = journal.Reader() | ||||
|     sdj.log_level(journal.LOG_INFO) | ||||
|     sdj.add_match(_SYSTEMD_UNIT=settings.systemd_unitname) | ||||
|     sdj.seek_realtime(timestamp) | ||||
|     for entry in sdj: | ||||
|         yield False, _get_msg_details(entry) | ||||
| 
 | ||||
| 
 | ||||
| def iter_journal_messages_follow( | ||||
|     timestamp: Union[int, float] | ||||
| ) -> Iterable[Tuple[bool, Optional[dict]]]: | ||||
|     """ | ||||
|     Yield commit and message details from the journal through polling. | ||||
| 
 | ||||
|     This is the polling phase (after we have read pre-existing messages | ||||
|     in the loading phase). | ||||
| 
 | ||||
|     Argument *timestamp* is a UNIX timestamp. | ||||
| 
 | ||||
|     Only journal entries for systemd unit settings.systemd_unitname with | ||||
|     loglevel INFO and above are retrieved. | ||||
| 
 | ||||
|     *commit* (bool) tells whether it is time to store the delivery | ||||
|     information obtained from the messages yielded by us. | ||||
|     It is set to True if settings.max_delay_before_commit has elapsed. | ||||
|     After this delay delivery information will be written; to be exact: | ||||
|     the delay may increase by up to one settings.journal_poll_interval. | ||||
|     """ | ||||
|     sdj = journal.Reader() | ||||
|     sdj.log_level(journal.LOG_INFO) | ||||
|     sdj.add_match(_SYSTEMD_UNIT=settings.systemd_unitname) | ||||
|     sdj.seek_realtime(timestamp) | ||||
|     p = select.poll() | ||||
|     p.register(sdj, sdj.get_events()) | ||||
|     last_commit = datetime.datetime.utcnow() | ||||
|     interval_ms = settings.journal_poll_interval * 1000 | ||||
|     while True: | ||||
|         p.poll(interval_ms) | ||||
|         commit = False | ||||
|         now = datetime.datetime.utcnow() | ||||
|         if last_commit + settings.max_delay_before_commit < now: | ||||
|             commit = True | ||||
|             last_commit = now | ||||
|         if sdj.process() == journal.APPEND: | ||||
|             for entry in sdj: | ||||
|                 yield commit, _get_msg_details(entry) | ||||
|         elif commit: | ||||
|             yield commit, None | ||||
| 
 | ||||
| 
 | ||||
| def iter_logfile_messages( | ||||
|     filepath: str, | ||||
|     year: int, | ||||
|     commit_after_lines=settings.max_messages_per_commit, | ||||
| ) -> Iterable[Tuple[bool, dict]]: | ||||
|     """ | ||||
|     Yield messages and a commit flag from a logfile. | ||||
| 
 | ||||
|     Loop through all lines of the file with given *filepath* and | ||||
|     extract the time and log message. If the log message starts | ||||
|     with 'postfix/', then extract the syslog_identifier, pid and | ||||
|     message text. | ||||
| 
 | ||||
|     Since syslog lines do not contain the year, the *year* to which | ||||
|     the first log line belongs must be given. | ||||
| 
 | ||||
|     Return a commit flag and a dict with these keys: | ||||
|         't': timestamp | ||||
|         'message': message text | ||||
|         'identifier': syslog identifier (e.g., 'postfix/smtpd') | ||||
|         'pid': process id | ||||
| 
 | ||||
|     The commit flag will be set to True for every | ||||
|     (commit_after_lines)-th filtered message and serves | ||||
|     as a signal to the caller to commit this chunk of data | ||||
|     to the database. | ||||
|     """ | ||||
|     dt = None | ||||
|     with open(filepath, 'r') as fh: | ||||
|         cnt = 0 | ||||
|         while True: | ||||
|             line = fh.readline() | ||||
|             if not line: | ||||
|                 break | ||||
| 
 | ||||
|             # get datetime | ||||
|             timestamp = line[:15] | ||||
|             dt_prev = dt | ||||
|             dt = _parse_logfile_timestamp(timestamp, year) | ||||
|             if dt is None: | ||||
|                 continue  # discard log message with invalid timestamp | ||||
| 
 | ||||
|             # if we transgress a year boundary, then increment the year | ||||
|             if dt_prev and dt + datetime.timedelta(days=1) < dt_prev: | ||||
|                 year += 1 | ||||
|                 dt = _parse_logfile_timestamp(timestamp, year) | ||||
| 
 | ||||
|             # filter postfix messages | ||||
|             msg = line[21:].strip() | ||||
|             if 'postfix/' in msg: | ||||
|                 cnt += 1 | ||||
|                 syslog_identifier, msg_ = msg.split('[', 1) | ||||
|                 pid, msg__ = msg_.split(']', 1) | ||||
|                 message = msg__[2:] | ||||
|                 commit = cnt % commit_after_lines == 0 | ||||
|                 yield commit, { | ||||
|                     't': dt, | ||||
|                     'message': message, | ||||
|                     'identifier': syslog_identifier, | ||||
|                     'pid': pid, | ||||
|                 } | ||||
| 
 | ||||
| 
 | ||||
| def _get_msg_details(journal_entry: dict) -> dict: | ||||
|     """ | ||||
|     Return information extracted from a journal entry object as a dict. | ||||
|     """ | ||||
|     return { | ||||
|         't': journal_entry['__REALTIME_TIMESTAMP'], | ||||
|         'message': journal_entry['MESSAGE'], | ||||
|         'identifier': journal_entry.get('SYSLOG_IDENTIFIER'), | ||||
|         'pid': journal_entry.get('SYSLOG_PID'), | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def _parse_logfile_timestamp( | ||||
|     timestamp: Optional[str], | ||||
|     year: int | ||||
| ) -> Optional[datetime.datetime]: | ||||
|     """ | ||||
|     Parse a given syslog *timestamp* and return a datetime. | ||||
| 
 | ||||
|     Since the timestamp does not contain the year, it is an | ||||
|     extra argument. | ||||
| 
 | ||||
|     Note: Successful parsing og the month's name depends on | ||||
|     the locale under which this script runs. | ||||
|     """ | ||||
|     if timestamp is None: | ||||
|         return None | ||||
|     try: | ||||
|         timestamp = timestamp.replace('  ', ' ') | ||||
|         t1 = datetime.datetime.strptime(timestamp, '%b %d %H:%M:%S') | ||||
|         t2 = t1.replace(year=year) | ||||
|         return t2 | ||||
|     except Exception: | ||||
|         return None | ||||
							
								
								
									
										337
									
								
								journal-postfix/files/srv/storage.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										337
									
								
								journal-postfix/files/srv/storage.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,337 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Storage to PostgreSQL. | ||||
| """ | ||||
| 
 | ||||
| import datetime | ||||
| import json | ||||
| import re | ||||
| import time | ||||
| from collections import defaultdict | ||||
| from traceback import format_exc | ||||
| from typing import Any, Dict, Iterable, List, Optional, Tuple, Union | ||||
| import psycopg2 | ||||
| import psycopg2.extras | ||||
| from systemd import journal | ||||
| import settings | ||||
| from storage_setup import ( | ||||
|     get_create_table_stmts, | ||||
|     get_sql_prepared_statement, | ||||
|     get_sql_execute_prepared_statement, | ||||
|     table_fields, | ||||
| ) | ||||
| 
 | ||||
| 
 | ||||
| def get_latest_timestamp(curs: psycopg2.extras.RealDictCursor) -> int: | ||||
|     """ | ||||
|     Fetch the latest timestamp from the database. | ||||
| 
 | ||||
|     Return the latest timestamp of a message transfer from the database. | ||||
|     If there are no records yet, return 0. | ||||
|     """ | ||||
|     last = 0 | ||||
|     curs.execute( | ||||
|         "SELECT greatest(max(t_i), max(t_f)) AS last FROM delivery_from" | ||||
|     ) | ||||
|     last1 = curs.fetchone()['last'] | ||||
|     if last1: | ||||
|         last = max( | ||||
|             last, (last1 - datetime.datetime(1970, 1, 1)).total_seconds() | ||||
|         ) | ||||
|     curs.execute( | ||||
|         "SELECT greatest(max(t_i), max(t_f)) AS last FROM delivery_to" | ||||
|     ) | ||||
|     last2 = curs.fetchone()['last'] | ||||
|     if last2: | ||||
|         last = max( | ||||
|             last, (last2 - datetime.datetime(1970, 1, 1)).total_seconds() | ||||
|         ) | ||||
|     return last | ||||
| 
 | ||||
| 
 | ||||
| def delete_old_deliveries(curs: psycopg2.extras.RealDictCursor) -> None: | ||||
|     """ | ||||
|     Delete deliveries older than the configured number of days. | ||||
| 
 | ||||
|     See config param *delete_deliveries_after_days*. | ||||
|     """ | ||||
|     max_days = settings.delete_deliveries_after_days | ||||
|     if max_days: | ||||
|         now = datetime.datetime.utcnow() | ||||
|         dt = datetime.timedelta(days=max_days) | ||||
|         t0 = now - dt | ||||
|         curs.execute("DELETE FROM delivery_from WHERE t_i < %s", (t0,)) | ||||
|         curs.execute("DELETE FROM delivery_to WHERE t_i < %s", (t0,)) | ||||
|         curs.execute("DELETE FROM noqueue WHERE t < %s", (t0,)) | ||||
| 
 | ||||
| 
 | ||||
| def store_delivery_items( | ||||
|     cursor, | ||||
|     cache: List[dict], | ||||
|     debug: List[str] = [] | ||||
| ) -> None: | ||||
|     """ | ||||
|     Store cached delivery items into the database. | ||||
| 
 | ||||
|     Find queue_ids in *cache* and group delivery items by | ||||
|     them, but separately for delivery types 'from' and 'to'. | ||||
|     In addition, collect delivery items with queue_id is None. | ||||
| 
 | ||||
|     After grouping we merge all items withing a group into a | ||||
|     single item. So we can combine several SQL queries into | ||||
|     a single one, which improves performance significantly. | ||||
| 
 | ||||
|     Then store the merged items and the deliveries with | ||||
|     queue_id is None. | ||||
|     """ | ||||
|     if 'all' in debug or 'sql' in debug: | ||||
|         print(f'Storing {len(cache)} messages.') | ||||
|     if not cache: | ||||
|         return | ||||
|     from_items, to_items, noqueue_items = _group_delivery_items(cache) | ||||
|     deliveries_from = _merge_delivery_items(from_items, item_type='from') | ||||
|     deliveries_to = _merge_delivery_items(to_items, item_type='to') | ||||
|     _store_deliveries(cursor, 'delivery_from', deliveries_from, debug=debug) | ||||
|     _store_deliveries(cursor, 'delivery_to', deliveries_to, debug=debug) | ||||
|     _store_deliveries(cursor, 'noqueue', noqueue_items, debug=debug) | ||||
| 
 | ||||
| 
 | ||||
| FromItems = Dict[str, List[dict]] | ||||
| 
 | ||||
| 
 | ||||
| ToItems = Dict[Tuple[str, Optional[str]], List[dict]] | ||||
| 
 | ||||
| 
 | ||||
| NoqueueItems = Dict[int, dict] | ||||
| 
 | ||||
| 
 | ||||
| def _group_delivery_items( | ||||
|     cache: List[dict] | ||||
| ) -> Tuple[FromItems, ToItems, NoqueueItems]: | ||||
|     """ | ||||
|     Group delivery items by type and queue_id. | ||||
| 
 | ||||
|     Return items of type 'from', of type 'to' and items without | ||||
|     queue_id. | ||||
|     """ | ||||
|     delivery_from_items: FromItems = defaultdict(list) | ||||
|     delivery_to_items: ToItems = defaultdict(list) | ||||
|     noqueue_items: NoqueueItems = {} | ||||
|     noqueue_i = 1 | ||||
|     for item in cache: | ||||
|         if item.get('queue_id'): | ||||
|             queue_id = item['queue_id'] | ||||
|             if item.get('type') == 'from': | ||||
|                 delivery_from_items[queue_id].append(item) | ||||
|             else: | ||||
|                 recipient = item.get('recipient') | ||||
|                 delivery_to_items[(queue_id, recipient)].append(item) | ||||
|         else: | ||||
|             noqueue_items[noqueue_i] = item | ||||
|             noqueue_i += 1 | ||||
|     return delivery_from_items, delivery_to_items, noqueue_items | ||||
| 
 | ||||
| 
 | ||||
| def _merge_delivery_items( | ||||
|     delivery_items: Union[FromItems, ToItems], | ||||
|     item_type: str = 'from', | ||||
| ) -> Dict[Union[str, Tuple[str, Optional[str]]], dict]: | ||||
|     """ | ||||
|     Compute deliveries by combining multiple delivery items. | ||||
| 
 | ||||
|     Take lists of delivery items for each queue_id (in case | ||||
|     of item_type=='from') or for (queue_id, recipient)-pairs | ||||
|     (in case of item_type='to'). | ||||
|     Each delivery item is a dict obtained from one log message. | ||||
|     The dicts are consecutively updated (merged), except for the | ||||
|     raw log messages (texts) which are collected into a list. | ||||
|     The fields of the resulting delivery are filtered according | ||||
|     to the target table. | ||||
|     Returned is a dict mapping queue_ids (in case | ||||
|     of item_type=='from') or (queue_id, recipient)-pairs | ||||
|     (in case of item_type='to') to deliveries. | ||||
|     """ | ||||
|     deliveries = {} | ||||
|     for group, items in delivery_items.items(): | ||||
|         delivery = {} | ||||
|         messages = [] | ||||
|         for item in items: | ||||
|             message = item.pop('message') | ||||
|             identifier = item.pop('identifier') | ||||
|             pid = item.pop('pid') | ||||
|             messages.append(f'{identifier}[{pid}]: {message}') | ||||
|             delivery.update(item) | ||||
|         delivery['messages'] = messages | ||||
|         deliveries[group] = delivery | ||||
|     return deliveries | ||||
| 
 | ||||
| 
 | ||||
| def _store_deliveries( | ||||
|     cursor: psycopg2.extras.RealDictCursor, | ||||
|     table_name: str, | ||||
|     deliveries: Dict[Any, dict], | ||||
|     debug: List[str] = [], | ||||
| ) -> None: | ||||
|     """ | ||||
|     Store grouped and merged delivery items. | ||||
|     """ | ||||
|     if not deliveries: | ||||
|         return | ||||
|     n = len(deliveries.values()) | ||||
|     t0 = time.time() | ||||
|     cursor.execute('BEGIN') | ||||
|     _store_deliveries_batch(cursor, table_name, deliveries.values()) | ||||
|     cursor.execute('COMMIT') | ||||
|     t1 = time.time() | ||||
|     if 'all' in debug or 'sql' in debug: | ||||
|         milliseconds = (t1 - t0) * 1000 | ||||
|         print( | ||||
|             '*' * 10, | ||||
|             f'SQL transaction time {table_name}: ' | ||||
|             f'{milliseconds:.2f} ms ({n} deliveries)', | ||||
|         ) | ||||
| 
 | ||||
| 
 | ||||
| def _store_deliveries_batch( | ||||
|     cursor: psycopg2.extras.RealDictCursor, | ||||
|     table_name: str, | ||||
|     deliveries: Iterable[dict] | ||||
| ) -> None: | ||||
|     """ | ||||
|     Store *deliveries* (i.e., grouped and merged delivery items). | ||||
| 
 | ||||
|     We use a prepared statement and execute_batch() from | ||||
|     psycopg2.extras to improve performance. | ||||
|     """ | ||||
|     rows = [] | ||||
|     for delivery in deliveries: | ||||
|         # get values for all fields of the table | ||||
|         field_values: List[Any] = [] | ||||
|         t = delivery.get('t') | ||||
|         delivery['t_i'] = t | ||||
|         delivery['t_f'] = t | ||||
|         for field in table_fields[table_name]: | ||||
|             if field in delivery: | ||||
|                 if field == 'messages': | ||||
|                     field_values.append(json.dumps(delivery[field])) | ||||
|                 else: | ||||
|                     field_values.append(delivery[field]) | ||||
|             else: | ||||
|                 field_values.append(None) | ||||
|         rows.append(field_values) | ||||
|     sql = get_sql_execute_prepared_statement(table_name) | ||||
|     try: | ||||
|         psycopg2.extras.execute_batch(cursor, sql, rows) | ||||
|     except Exception as err: | ||||
|         msg = f'SQL statement failed: "{sql}" -- the values were: {rows}' | ||||
|         journal.send(msg, PRIORITY=journal.LOG_ERR) | ||||
| 
 | ||||
| 
 | ||||
| def init_db(config: dict) -> Optional[str]: | ||||
|     """ | ||||
|     Initialize database; if ok return DSN, else None. | ||||
| 
 | ||||
|     Try to get parameters for database access, | ||||
|     check existence of tables and possibly create them. | ||||
|     """ | ||||
|     dsn = _get_dsn(config) | ||||
|     if dsn: | ||||
|         ok = _create_tables(dsn) | ||||
|         if not ok: | ||||
|             return None | ||||
|     return dsn | ||||
| 
 | ||||
| 
 | ||||
| def _get_dsn(config: dict) -> Optional[str]: | ||||
|     """ | ||||
|     Return the DSN (data source name) from the *config*. | ||||
|     """ | ||||
|     try: | ||||
|         postgresql_config = config['postgresql'] | ||||
|         hostname = postgresql_config['hostname'] | ||||
|         port = postgresql_config['port'] | ||||
|         database = postgresql_config['database'] | ||||
|         username = postgresql_config['username'] | ||||
|         password = postgresql_config['password'] | ||||
|     except Exception: | ||||
|         msg = f"""ERROR: invalid config in {settings.main_config_file} | ||||
| The config file must contain a section like this: | ||||
| 
 | ||||
| postgresql: | ||||
|     hostname: <HOSTNAME_OR_IP> | ||||
|     port: <PORT> | ||||
|     database: <DATABASE_NAME> | ||||
|     username: <USERNAME> | ||||
|     password: <PASSWORD> | ||||
| """ | ||||
|         journal.send(msg, PRIORITY=journal.LOG_CRIT) | ||||
|         return None | ||||
|     dsn = f'host={hostname} port={port} dbname={database} '\ | ||||
|           f'user={username} password={password}' | ||||
|     return dsn | ||||
| 
 | ||||
| 
 | ||||
| def _create_tables(dsn: str) -> bool: | ||||
|     """ | ||||
|     Check existence of tables and possibly create them, returning success. | ||||
|     """ | ||||
|     try: | ||||
|         with psycopg2.connect(dsn) as conn: | ||||
|             with conn.cursor() as curs: | ||||
|                 for table_name, sql_stmts in get_create_table_stmts().items(): | ||||
|                     ok = _create_table(curs, table_name, sql_stmts) | ||||
|                     if not ok: | ||||
|                         return False | ||||
|     except Exception: | ||||
|         journal.send( | ||||
|             f'ERROR: cannot connect to database, check params' | ||||
|             f' in {settings.main_config_file}', | ||||
|             PRIORITY=journal.LOG_CRIT, | ||||
|         ) | ||||
|         return False | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def _create_table( | ||||
|     cursor: psycopg2.extras.RealDictCursor, | ||||
|     table_name: str, | ||||
|     sql_stmts: List[str] | ||||
| ) -> bool: | ||||
|     """ | ||||
|     Try to create a table if it does not exist and return whether it exists. | ||||
| 
 | ||||
|     If creation failed, emit an error to the journal. | ||||
|     """ | ||||
|     cursor.execute("SELECT EXISTS(SELECT * FROM " | ||||
|                    "information_schema.tables WHERE table_name=%s)", | ||||
|                    (table_name,)) | ||||
|     table_exists = cursor.fetchone()[0] | ||||
|     if not table_exists: | ||||
|         for sql_stmt in sql_stmts: | ||||
|             try: | ||||
|                 cursor.execute(sql_stmt) | ||||
|             except Exception: | ||||
|                 journal.send( | ||||
|                     'ERROR: database user needs privilege to create tables.\n' | ||||
|                     'Alternatively, you can create the table manually like' | ||||
|                     ' this:\n\n' | ||||
|                     + '\n'.join([sql + ';' for sql in sql_stmts]), | ||||
|                     PRIORITY=journal.LOG_CRIT, | ||||
|                 ) | ||||
|                 return False | ||||
|     return True | ||||
| 
 | ||||
| 
 | ||||
| def init_session(cursor: psycopg2.extras.RealDictCursor) -> None: | ||||
|     """ | ||||
|     Init a database session. | ||||
| 
 | ||||
|     Define prepared statements. | ||||
|     """ | ||||
|     stmt = get_sql_prepared_statement('delivery_from') | ||||
|     cursor.execute(stmt) | ||||
|     stmt = get_sql_prepared_statement('delivery_to') | ||||
|     cursor.execute(stmt) | ||||
|     stmt = get_sql_prepared_statement('noqueue') | ||||
|     cursor.execute(stmt) | ||||
							
								
								
									
										210
									
								
								journal-postfix/files/srv/storage_setup.py
									
										
									
									
									
										Executable file
									
								
							
							
						
						
									
										210
									
								
								journal-postfix/files/srv/storage_setup.py
									
										
									
									
									
										Executable file
									
								
							|  | @ -0,0 +1,210 @@ | |||
| #!/usr/bin/env python3 | ||||
| 
 | ||||
| """ | ||||
| Database table definitions and prepared statements. | ||||
| 
 | ||||
| Note: (short) postfix queue IDs are not unique: | ||||
| http://postfix.1071664.n5.nabble.com/Queue-ID-gets-reused-Not-unique-td25387.html | ||||
| """ | ||||
| 
 | ||||
| from typing import Dict, List | ||||
| 
 | ||||
| 
 | ||||
| _table_def_delivery_from = [ | ||||
|     [ | ||||
|         dict(name='t_i', dtype='TIMESTAMP'), | ||||
|         dict(name='t_f', dtype='TIMESTAMP'), | ||||
|         dict(name='queue_id', dtype='VARCHAR(16)', null=False, extra='UNIQUE'), | ||||
|         dict(name='host', dtype='VARCHAR(200)'), | ||||
|         dict(name='ip', dtype='VARCHAR(50)'), | ||||
|         dict(name='sasl_username', dtype='VARCHAR(300)'), | ||||
|         dict(name='orig_queue_id', dtype='VARCHAR(16)'), | ||||
|         dict(name='status', dtype='VARCHAR(10)'), | ||||
|         dict(name='accepted', dtype='BOOL', null=False, default='TRUE'), | ||||
|         dict(name='done', dtype='BOOL', null=False, default='FALSE'), | ||||
|         dict(name='sender', dtype='VARCHAR(300)'), | ||||
|         dict(name='message_id', dtype='VARCHAR(1000)'), | ||||
|         dict(name='resent_message_id', dtype='VARCHAR(1000)'), | ||||
|         dict(name='subject', dtype='VARCHAR(1000)'), | ||||
|         dict(name='phase', dtype='VARCHAR(15)'), | ||||
|         dict(name='error', dtype='VARCHAR(1000)'), | ||||
|         dict(name='size', dtype='INT'), | ||||
|         dict(name='nrcpt', dtype='INT'), | ||||
|         dict(name='verp_id', dtype='INT'), | ||||
|         dict(name='messages', dtype='JSONB', null=False, default="'{}'::JSONB"), | ||||
|     ], | ||||
|     "CREATE INDEX delivery_from__queue_id ON delivery_from (queue_id)", | ||||
|     "CREATE INDEX delivery_from__t_i ON delivery_from (t_i)", | ||||
|     "CREATE INDEX delivery_from__t_f ON delivery_from (t_f)", | ||||
|     "CREATE INDEX delivery_from__sender ON delivery_from (sender)", | ||||
|     "CREATE INDEX delivery_from__message_id ON delivery_from (message_id)", | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| _table_def_delivery_to = [ | ||||
|     [ | ||||
|         dict(name='t_i', dtype='TIMESTAMP'), | ||||
|         dict(name='t_f', dtype='TIMESTAMP'), | ||||
|         dict(name='queue_id', dtype='VARCHAR(16)', null=False), | ||||
|         dict(name='recipient', dtype='VARCHAR(300)'), | ||||
|         dict(name='orig_recipient', dtype='VARCHAR(300)'), | ||||
|         dict(name='host', dtype='VARCHAR(200)'), | ||||
|         dict(name='ip', dtype='VARCHAR(50)'), | ||||
|         dict(name='port', dtype='VARCHAR(10)'), | ||||
|         dict(name='relay', dtype='VARCHAR(10)'), | ||||
|         dict(name='delay', dtype='VARCHAR(200)'), | ||||
|         dict(name='delays', dtype='VARCHAR(200)'), | ||||
|         dict(name='dsn', dtype='VARCHAR(10)'), | ||||
|         dict(name='status', dtype='VARCHAR(10)'), | ||||
|         dict(name='status_text', dtype='VARCHAR(1000)'), | ||||
|         dict(name='messages', dtype='JSONB', null=False, default="'{}'::JSONB"), | ||||
|     ], | ||||
|     "ALTER TABLE delivery_to ADD CONSTRAINT" | ||||
|     " delivery_to__queue_id_recipient UNIQUE(queue_id, recipient)", | ||||
|     "CREATE INDEX delivery_to__queue_id ON delivery_to (queue_id)", | ||||
|     "CREATE INDEX delivery_to__recipient ON delivery_to (recipient)", | ||||
|     "CREATE INDEX delivery_to__t_i ON delivery_to (t_i)", | ||||
|     "CREATE INDEX delivery_to__t_f ON delivery_to (t_f)", | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| _table_def_noqueue = [ | ||||
|     [ | ||||
|         dict(name='t', dtype='TIMESTAMP'), | ||||
|         dict(name='host', dtype='VARCHAR(200)'), | ||||
|         dict(name='ip', dtype='VARCHAR(50)'), | ||||
|         dict(name='sender', dtype='VARCHAR(300)'), | ||||
|         dict(name='recipient', dtype='VARCHAR(300)'), | ||||
|         dict(name='sasl_username', dtype='VARCHAR(300)'), | ||||
|         dict(name='status', dtype='VARCHAR(10)'), | ||||
|         dict(name='phase', dtype='VARCHAR(15)'), | ||||
|         dict(name='error', dtype='VARCHAR(1000)'), | ||||
|         dict(name='message', dtype='TEXT'), | ||||
|     ], | ||||
|     "CREATE INDEX noqueue__t ON noqueue (t)", | ||||
|     "CREATE INDEX noqueue__sender ON noqueue (sender)", | ||||
|     "CREATE INDEX noqueue__recipient ON noqueue (recipient)", | ||||
| ] | ||||
| 
 | ||||
| 
 | ||||
| _tables: Dict[str, list] = { | ||||
|     'delivery_from': _table_def_delivery_from, | ||||
|     'delivery_to': _table_def_delivery_to, | ||||
|     'noqueue': _table_def_noqueue, | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| _prepared_statements = { | ||||
|     'delivery_from': | ||||
|         "PREPARE delivery_from_insert ({}) AS " | ||||
|         "INSERT INTO delivery_from ({}) VALUES ({}) " | ||||
|         "ON CONFLICT (queue_id) DO UPDATE SET {}", | ||||
|     'delivery_to': | ||||
|         "PREPARE delivery_to_insert ({}) AS " | ||||
|         "INSERT INTO delivery_to ({}) VALUES ({}) " | ||||
|         "ON CONFLICT (queue_id, recipient) DO UPDATE SET {}", | ||||
|     'noqueue': | ||||
|         "PREPARE noqueue_insert ({}) AS " | ||||
|         "INSERT INTO noqueue ({}) VALUES ({}){}", | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| table_fields: Dict[str, List[str]] = {} | ||||
| """ | ||||
| Lists of field names for tables, populated by get_create_table_stmts(). | ||||
| """ | ||||
| 
 | ||||
| 
 | ||||
| def get_sql_prepared_statement(table_name: str) -> str: | ||||
|     """ | ||||
|     Return SQL defining a prepared statement for inserting into a table. | ||||
| 
 | ||||
|     Table 'noqueue' is handled differently, because it does not have | ||||
|     an UPDATE clause. | ||||
|     """ | ||||
|     col_names = [] | ||||
|     col_types = [] | ||||
|     col_args = [] | ||||
|     col_upds = [] | ||||
|     col_i = 0 | ||||
|     for field in _tables[table_name][0]: | ||||
|         # column type | ||||
|         col_type = field['dtype'] | ||||
|         if field['dtype'].lower().startswith('varchar'): | ||||
|             col_type = 'TEXT' | ||||
|         col_types.append(col_type) | ||||
|         # column args | ||||
|         col_i += 1 | ||||
|         col_arg = '$' + str(col_i) | ||||
|         # column name | ||||
|         col_name = field['name'] | ||||
|         col_names.append(col_name) | ||||
|         if 'default' in field: | ||||
|             default = field['default'] | ||||
|             col_args.append(f'COALESCE({col_arg},{default})') | ||||
|         else: | ||||
|             col_args.append(col_arg) | ||||
|         # column update | ||||
|         col_upd = f'{col_name}=COALESCE({col_arg},{table_name}.{col_name})' | ||||
|         if col_name != 't_i': | ||||
|             if col_name == 'messages': | ||||
|                 col_upd = f'{col_name}={table_name}.{col_name}||{col_arg}' | ||||
|             if table_name != 'noqueue': | ||||
|                 col_upds.append(col_upd) | ||||
|     stmt = _prepared_statements[table_name].format( | ||||
|         ','.join(col_types), | ||||
|         ','.join(col_names), | ||||
|         ','.join(col_args), | ||||
|         ','.join(col_upds), | ||||
|     ) | ||||
|     return stmt | ||||
| 
 | ||||
| 
 | ||||
| def get_sql_execute_prepared_statement(table_name: str) -> str: | ||||
|     """ | ||||
|     Return SQL for executing the given table's prepared statement. | ||||
| 
 | ||||
|     The result is based on global variable _tables. | ||||
|     """ | ||||
|     fields = _tables[table_name][0] | ||||
|     return "EXECUTE {}_insert ({})"\ | ||||
|         .format(table_name, ','.join(['%s' for i in range(len(fields))])) | ||||
| 
 | ||||
| 
 | ||||
| def get_create_table_stmts() -> Dict[str, List[str]]: | ||||
|     """ | ||||
|     Return a dict mapping table names to SQL statements creating the tables. | ||||
| 
 | ||||
|     Also populate global variable table_fields. | ||||
|     """ | ||||
|     res = {} | ||||
|     for table_name, table_def in _tables.items(): | ||||
|         stmts = table_def.copy() | ||||
|         stmts[0] = _get_sql_create_stmt(table_name, table_def[0]) | ||||
|         res[table_name] = stmts | ||||
|         field_names = [x['name'] for x in table_def[0]] | ||||
|         global table_fields | ||||
|         table_fields[table_name] = field_names | ||||
|     return res | ||||
| 
 | ||||
| 
 | ||||
| def _get_sql_create_stmt(table_name: str, fields: List[dict]): | ||||
|     """ | ||||
|     Return the 'CREATE TABLE' SQL statement for a table. | ||||
| 
 | ||||
|     Factor in NULL, DEFAULT and extra DDL text. | ||||
|     """ | ||||
|     sql = f"CREATE TABLE {table_name} (\n    id BIGSERIAL," | ||||
|     col_defs = [] | ||||
|     for field in fields: | ||||
|         col_def = f"    {field['name']} {field['dtype']}" | ||||
|         if 'null' in field and field['null'] is False: | ||||
|             col_def += " NOT NULL" | ||||
|         if 'default' in field: | ||||
|             col_def += f" DEFAULT {field['default']}" | ||||
|         if 'extra' in field: | ||||
|             col_def += f" {field['extra']}" | ||||
|         col_defs.append(col_def) | ||||
|     sql += '\n' + ',\n'.join(col_defs) | ||||
|     sql += '\n)' | ||||
|     return sql | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue