Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagepy
from squirro.sdk import PipeletV1
from datetime import datetime


SQUIRRO_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S"


class TimeFormatsPipelet(PipeletV1):
    """Parse values of datetime fields using a list of time formats.

    This pipelet can be used when you want to create a label (facet) for
    a datetime source field which might include values in more than one
    formats.
    
    For example, there is a source field called `date_info` which stores
    datetime infomation, which takes the following three values:
    13/09/2022, 13:05:18, 2022-09-13T13:05:18
    All three values follow a different format, with some even missing some
    information (like date or time part).
    
    You can use this pipelet as the first step of your pipeline workflow
    (before the `Transform Input` step if present), and configure it as follows:
    ```
    {
      "date_info": [
        "%d/%m/%Y",
        "%H:%M:%S"
      ]
    }
    ```
    """

    def getArguments():
        return [
            {
                "name": "source_field_time_formats_map",
                "display_label": "Source Field to Time Formats map",
                "help": "Dictionary which maps source field names to time formats to try out in order to successfully parse their values.",
                "type": "code",
                "syntax": "json",
            },
        ]


    def __init__(self, config):
        self.config = config

    def consume(self, item):
        source_field_time_formats_map = self.config.get('source_field_time_formats_map', {})

        for key, value in item.items():

            if key in facet_time_formats_map:

                time_formats, value_dt = facet_time_formats_map[key], None

                for time_format in time_formats + [SQUIRRO_DATE_FORMAT]:
                    try:
                        value_dt = datetime.strptime(value, time_format)
                    except ValueError:
                        pass
                    else:
                        break

                if not value_dt:
                    raise ValueError("aaaf"Could not parse value {value} with any of the configured formats: {time_formats}")
                item[key] = value_dt.strftime(SQUIRRO_DATE_FORMAT)

        return item

...