...
Code Block | ||
---|---|---|
| ||
from squirro.sdk import PipeletV1 from datetime import datetime SQUIRRO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S" class TimeFormatsPipelet(PipeletV1): """Parse values of datetime fields using a list of time formats. This pipelet can be used when you want to create a label (facet) for a datetime source field which might include values in more than one formats. For example, there is a source field called `date_info` which stores datetime infomation, which takes the following three values: 13/09/2022, 13:05:18, 2022-09-13T13:05:18 All three values follow a different format, with some even missing some information (like date or time part). You can use this pipelet as the first step of your pipeline workflow (before the `Transform Input` step if present), and configure it as follows: ``` { "date_info": [ "%d/%m/%Y", "%H:%M:%S" ] } ``` """ def getArguments(): return [ { "name": "source_field_time_formats_map", "display_label": "Source Field to Time Formats map", "help": "Dictionary which maps source field names to time formats to try out in order to successfully parse their values.", "type": "code", "syntax": "json", }, ] def __init__(self, config): self.config = config def consume(self, item): source_field_time_formats_map = self.config.get('"source_field_time_formats_map'", {}) for key, value in item.items(): if key in facetsource_field_time_formats_map: time_formats, value_dt = facetsource_field_time_formats_map[key], None for time_format in time_formats + [SQUIRRO_DATE_FORMAT]: try: value_dt = datetime.strptime(value, time_format) except ValueError: pass else: break if not value_dt: raise ValueError(f"Could not parse value {value} with any of the configured formats: {time_formats}") item[key] = value_dt.strftime(SQUIRRO_DATE_FORMAT) return item |
...