This is a pipelet to support the list of format strings on the Squirro UI.
You may upload it to your Squirro instance, and then you can add it in the Load
section of your pipeline workflows, before the Transform Input
step (if it is present).
Please read the docstring of the pipelet for an example on how to configure it.
from squirro.sdk import PipeletV1 from datetime import datetime SQUIRRO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S" class TimeFormatsPipelet(PipeletV1): """Parse values of datetime fields using a list of time formats. This pipelet can be used when you want to create a label (facet) for a datetime source field which might include values in more than one formats. For example, there is a source field called `date_info` which stores datetime infomation, which takes the following three values: 13/09/2022, 13:05:18, 2022-09-13T13:05:18 All three values follow a different format, with some even missing some information (like date or time part). You can use this pipelet as the first step of your pipeline workflow (before the `Transform Input` step if present), and configure it as follows: ``` { "date_info": [ "%d/%m/%Y", "%H:%M:%S" ] } ``` """ def getArguments(): return [ { "name": "source_field_time_formats_map", "display_label": "Source Field to Time Formats map", "help": "Dictionary which maps source field names to time formats to try out in order to successfully parse their values.", "type": "code", "syntax": "json", }, ] def __init__(self, config): self.config = config def consume(self, item): source_field_time_formats_map = self.config.get("source_field_time_formats_map", {}) for key, value in item.items(): if key in source_field_time_formats_map: time_formats, value_dt = source_field_time_formats_map[key], None for time_format in time_formats + [SQUIRRO_DATE_FORMAT]: try: value_dt = datetime.strptime(value, time_format) except ValueError: pass else: break if not value_dt: raise ValueError(f"Could not parse value {value} with any of the configured formats: {time_formats}") item[key] = value_dt.strftime(SQUIRRO_DATE_FORMAT) return item