...
getArguments(self)
Return the list of arguments that the plugin accepts.
The result of this parsing is made available to the data loader plugin as the self.args
object.
Each list item is a dictionary with the following options:
Parameter | Description |
---|---|
name | Mandatory - the name of the argument. Recommended naming convention is to keep it all lower case, and separate words with an underscore. An option with the name |
flag | A short flag for the argument. Can be used to keep invocations of the data loader shorter, but this is used very sparingly. For example: |
help | The help string, output with --help . |
required | True if this argument is mandatory. |
default | The default value, if the argument has not been specified. |
type | The data type that is expected. Defaults to string , valid values are string , int , float and bool . |
action | The argparse action for this option. Valid options are store , store_true and store_false . store expects a value to be specified, whereas store_true and store_false will always set the value to either True or False . |
Examples
Code Block | ||
---|---|---|
| ||
def getArguments(self): return [ { "name": "file", "flag": "f", "help": "Excel file to load", "required": trueTrue, }, { "name": "excel_sheet", "default": 0, "type": "int", "help": "Excel sheet name. Default: get first sheet.", }, ] def connect(self, inc_column=None, max_inc_value=None): # Just an example for how to access the options self._file = open(self.args.file) |
Empty Plugin
This is a boilerplate template for an data loader plugin.
Code Block | ||||
---|---|---|---|---|
| ||||
""" Data loader Plugin Template """ import hashlib import logging from squirro.dataloader.data_source import DataSource log = logging.getLogger(__name__) class TemplateSource(DataSource): """ A Custom data loader Plugin """ def __init__(self): pass def connect(self, inc_column=None, max_inc_value=None): log.debug('Incremental Column: %r', inc_column) log.debug('Incremental Last Value: %r', max_inc_value) def disconnect(self): """Disconnect from the source.""" # Nothing to do pass def getDataBatch(self, batch_size): """ Generator - Get data from source on batches. :returns a list of dictionaries """ rows = [] # This call should ideally `yield` and not return all items directly content = get_content_from_somewhere() for row in content: # Emit a `row` here that's flat dictionary. If that's not the case # yet, transform it here. # But do not return a Squirro item - that's the job of the data # loader configuration (facets and mapping). rows.append(row) if len(rows) >= batch_size: yield rows rows = [] if rows: yield rows def getSchema(self): """ Return the schema of the dataset :returns a List containing the names of the columns retrieved from the source """ schema = [ 'title', 'body', 'created_at', 'id', 'summary', 'abstract', 'keywords' ] return schema def getJobId(self): """ Return a unique string for each different select :returns a string """ # Generate a stable id that changes with the main parameters m = hashlib.sha256() m.update(self.args.first_custom_param) m.update(self.args.second_custom_param) job_id = m.hexdigest() log.debug("Job ID: %s", job_id) return job_id def getArguments(self, parser): """ Add source arguments to the main arguments parser """ return [ { 'name': 'first_custom_param', 'help': 'Custom data Loader Plugin Argument 1', }, { 'name': 'second_custom_param', 'help': 'Custom Data Loader Plugin Argument 2', }, ] |