Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Code Block
languagepy
config =
{'dataset': {},
 'pipeline': [{'fields': ['body'], 'step': 'loader', 'type': 'squirro_query'},
              {'fields': ['body'],
               'mark_as_skipped': True,
               'step': 'filter',
               'type': 'empty'},
              {'cleaning': {'\tapprox.': ' approx',
                            '\netc.': ' etc',
                            ') i.e.': '). ie'},
 
                          '. ('input_fields': [' (body'],
  
                         '</p>': ' ',
                            '<p>': ' ',
                            'approx.': 'approx',
                            'etc.': 'etc',
                            'i.e.': 'ie'},
               'input_fields': ['body'],
               'output_fields': ['extract_sentences'],
               'rules': ['**',
                         '\n-',
                         '</h1>',
                         '</h2>',
                         '</h3>',        'output_fields': ['extract_sentences'],
               'rules': ['<br/>**',
                         '...',
                         '…',
                         ': '],
               'step': 'tokenizer',
               'type': 'sentences_nltk'},
              {'fields': ['extract_sentences'],
               'step': 'filter',
               'type': 'doc_split'},
              {'input_fields': ['extract_sentences'],
               'output_fields': ['extract_sentences'],
               'step': 'tokenizer',
               'type': 'html'},
              {'fields': ['extract_sentences'],
               'step': 'filter',
               'type': 'doc_split'},
              {'input_fields': ['extract_sentences'],
               'output_fields': ['sentences_normalized'],
               'step': 'normalizer',
               'type': 'html'},
              {'fields': ['sentences_normalized'],
               'mark_as_skipped': True,
               'step': 'filter',
               'type': 'regex',
               'whitelist_regexes': ['^.{20,}$']},
              {'blacklist_terms': [],
               'fields': ['sentences_normalized'],
               'matching_label': 'tax_rate1',
               'name': './models/ais/proximity',
               'non_matching_label': 'not_tax_rate1_tax_rate2',
               'output_field': 'prediction_tax_rate1',
               'step': 'filter',
               'type': 'proximity',
               'whitelist_terms': ['tax rate of~1|','tax rate~2|']},
              {'blacklist_terms': [],
               'fields': ['sentences_normalized'],
               'matching_label': 'tax_rate2',
               'name': './models/ais/proximity',
               'non_matching_label': 'not_tax_rate1_tax_rate2',
               'output_field': 'prediction_tax_rate2',
               'step': 'filter',
               'type': 'proximity',
               'whitelist_terms': ['"tax rate of"~1~3rate~4|', 'tax rate~1|']},
              {'delimiter': ',',
               'input_fields': ['prediction_tax_rate1', 'prediction_tax_rate2'],
               'output_field': 'prediction',
               'step': 'filter',
               'type': 'merge'},
              {'input_field': 'prediction',
               'output_field': 'prediction',
               'step': 'filter',
               'type': 'split'},
              {'fields': ['sentences_normalized', 'prediction'],
               'step': 'filter',
               'type': 'doc_join'},
              {'entity_name_field': 'Catalyst',
               'entity_type': 'Catalyst',
               'excluded_values': ['not_tax_rate1_tax_rate2'],
               'extract_field': 'sentences_normalized',
               'format_values': False,
               'global_property_field_map': {},
               'modes': ['process'],
               'property_field_map': {'Catalyst': ['prediction']},
               'required_properties': ['Catalyst'],
               'source_field': 'body',
               'step': 'filter',
               'type': 'squirro_entity'}
               ]
    }

...

Code Block
languagepy
client.ml_publish_model(project_id,\
    published_as='Proximity Model Tax Rate',\
    description='Proximity Model for Tax Rate v1,',\
    external_model=True,\
    global_id='newuniquehash<UNIQUE_HASH>',\
    location='squirroHQ<LOCATION_OF_ORIGIN>',\
    labels=['tax_rate1','tax_rate2','not_tax_rate1_tax_rate2'],\
    tagging_level='sentence',\
    workflow_name='[PUB] prox config import',\
    workflow_config=config)

...

Code Block
languagepy
client.ml_publish_model(project_id,\
    published_as='Proximity Model Tax Rate',\
    description='Proximity Model for Tax Rate v1',\
    external_model=True,\
    global_id='newuniquehash<UNIQUE_HASH>',\
    location='squirroHQ<LOCATION_OF_ORIGIN>',\
    labels=['tax_rate1','tax_rate2','not_tax_rate1_tax_rate2'],\
    tagging_level='sentence',\
    workflow_id='VLGRAEbLRZ2v5Uq_MPt77w')

...