...
getArguments(self)
Return the list of arguments that the plugin accepts.
The result of this parsing is made available to the data loader plugin as the self.args
object.
Each list item is a dictionary with the following options:
Parameter | Description |
---|---|
name | Mandatory - the name of the argument. Recommended naming convention is to keep it all lower case, and separate words with an underscore. An option with the name Note: Please be aware that underscores "_" in the argument name in the python script and translated to dashes "-" in the argument name of the squirro_data_load script. (e.g. mysource_password is read in as "--mysource-password") |
flag | A short flag for the argument. Can be used to keep invocations of the data loader shorter, but this is used very sparingly. For example: |
help | The help string, output with --help . |
required | True if this argument is mandatory. |
default | The default value, if the argument has not been specified. |
type | The data type that is expected. Defaults to string , valid values are string , int , float and bool . |
action | The argparse action for this option. Valid options are store , store_true and store_false . store expects a value to be specified, whereas store_true and store_false will always set the value to either True or False . |
choices | A list with a set of available choices. A user specifying a choice that is not listed in this list will result in an error. |
advanced |
|
Examples
Code Block | ||
---|---|---|
| ||
def getArguments(self): return [ { "name": "file", "flag": "f", "help": "Excel file to load", "required": True, }, { "name": "excel_sheet", "default": 0, "type": "int", "help": "Excel sheet name. Default: get first sheet.", }, ] def connect(self, inc_column=None, max_inc_value=None): # Just an example for how to access the options self._file = open(self.args.file) |
Warning |
---|
Note that `getDefaultSourceName` method is not yet available with our latest release. |
getDefaultSourceName
getDefaultSourceName(self)
This method is used to suggest a default title for the source created in the UI when the plugin is used with the dataloader provider. If this method has not been implemented, no default title suggestion will be provided by the dataloader provider.
Code Block | ||
---|---|---|
| ||
def getDefaultSourceName(self): # Use first and second parameter of the plugin as the author knows this will make a unique enough default title return str(self.args.arg1) + str(self.args.arg2) |
Empty Plugin
This is a boilerplate template for an data loader plugin.
Code Block | ||||
---|---|---|---|---|
| ||||
""" Data loader Plugin Template """ import hashlib import logging from squirro.dataloader.data_source import DataSource log = logging.getLogger(__name__) class TemplateSource(DataSource): """ A Custom data loader Plugin """ def __init__(self): pass def connect(self, inc_column=None, max_inc_value=None): log.debug('Incremental Column: %r', inc_column) log.debug('Incremental Last Value: %r', max_inc_value) def disconnect(self): """Disconnect from the source.""" # Nothing to do pass def getDataBatch(self, batch_size): """ Generator - Get data from source on batches. :returns a list of dictionaries """ rows = [] # This call should ideally `yield` and not return all items directly content = get_content_from_somewhere() for row in content: # Emit a `row` here that's flat dictionary. If that's not the case # yet, transform it here. # But do not return a Squirro item - that's the job of the data # loader configuration (facets and mapping). rows.append(row) if len(rows) >= batch_size: yield rows rows = [] if rows: yield rows def getSchema(self): """ Return the schema of the dataset :returns a List containing the names of the columns retrieved from the source """ schema = [ 'title', 'body', 'created_at', 'id', 'summary', 'abstract', 'keywords' ] return schema def getJobId(self): """ Return a unique string for each different select :returns a string """ # Generate a stable id that changes with the main parameters m = hashlib.sha256() m.update(self.args.first_custom_param) m.update(self.args.second_custom_param) job_id = m.hexdigest() log.debug("Job ID: %s", job_id) return job_id def getArguments(self): """ Add source arguments to the main arguments parser """ return [ { 'name': 'first_custom_param', 'help': 'Custom data Loader Plugin Argument 1', }, { 'name': 'second_custom_param', 'help': 'Custom Data Loader Plugin Argument 2', }, ] |