Created
October 3, 2014 18:14
-
-
Save mvaled/20bf24b68bdf450a0de7 to your computer and use it in GitHub Desktop.
Demo of a possible bug in fuzzy match
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?xml version="1.0" encoding="UTF-8"?> | |
| <transformation> | |
| <info> | |
| <name>fuzzy</name> | |
| <description/> | |
| <extended_description/> | |
| <trans_version/> | |
| <trans_type>Normal</trans_type> | |
| <directory>/</directory> | |
| <parameters> | |
| </parameters> | |
| <log> | |
| <trans-log-table><connection/> | |
| <schema/> | |
| <table/> | |
| <size_limit_lines/> | |
| <interval/> | |
| <timeout_days/> | |
| <field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STATUS</id><enabled>Y</enabled><name>STATUS</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name><subject/></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name><subject/></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name><subject/></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name><subject/></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name><subject/></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name><subject/></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>STARTDATE</id><enabled>Y</enabled><name>STARTDATE</name></field><field><id>ENDDATE</id><enabled>Y</enabled><name>ENDDATE</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>DEPDATE</id><enabled>Y</enabled><name>DEPDATE</name></field><field><id>REPLAYDATE</id><enabled>Y</enabled><name>REPLAYDATE</name></field><field><id>LOG_FIELD</id><enabled>Y</enabled><name>LOG_FIELD</name></field><field><id>EXECUTING_SERVER</id><enabled>N</enabled><name>EXECUTING_SERVER</name></field><field><id>EXECUTING_USER</id><enabled>N</enabled><name>EXECUTING_USER</name></field><field><id>CLIENT</id><enabled>N</enabled><name>CLIENT</name></field></trans-log-table> | |
| <perf-log-table><connection/> | |
| <schema/> | |
| <table/> | |
| <interval/> | |
| <timeout_days/> | |
| <field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>SEQ_NR</id><enabled>Y</enabled><name>SEQ_NR</name></field><field><id>LOGDATE</id><enabled>Y</enabled><name>LOGDATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>INPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>INPUT_BUFFER_ROWS</name></field><field><id>OUTPUT_BUFFER_ROWS</id><enabled>Y</enabled><name>OUTPUT_BUFFER_ROWS</name></field></perf-log-table> | |
| <channel-log-table><connection/> | |
| <schema/> | |
| <table/> | |
| <timeout_days/> | |
| <field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>LOGGING_OBJECT_TYPE</id><enabled>Y</enabled><name>LOGGING_OBJECT_TYPE</name></field><field><id>OBJECT_NAME</id><enabled>Y</enabled><name>OBJECT_NAME</name></field><field><id>OBJECT_COPY</id><enabled>Y</enabled><name>OBJECT_COPY</name></field><field><id>REPOSITORY_DIRECTORY</id><enabled>Y</enabled><name>REPOSITORY_DIRECTORY</name></field><field><id>FILENAME</id><enabled>Y</enabled><name>FILENAME</name></field><field><id>OBJECT_ID</id><enabled>Y</enabled><name>OBJECT_ID</name></field><field><id>OBJECT_REVISION</id><enabled>Y</enabled><name>OBJECT_REVISION</name></field><field><id>PARENT_CHANNEL_ID</id><enabled>Y</enabled><name>PARENT_CHANNEL_ID</name></field><field><id>ROOT_CHANNEL_ID</id><enabled>Y</enabled><name>ROOT_CHANNEL_ID</name></field></channel-log-table> | |
| <step-log-table><connection/> | |
| <schema/> | |
| <table/> | |
| <timeout_days/> | |
| <field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>TRANSNAME</id><enabled>Y</enabled><name>TRANSNAME</name></field><field><id>STEPNAME</id><enabled>Y</enabled><name>STEPNAME</name></field><field><id>STEP_COPY</id><enabled>Y</enabled><name>STEP_COPY</name></field><field><id>LINES_READ</id><enabled>Y</enabled><name>LINES_READ</name></field><field><id>LINES_WRITTEN</id><enabled>Y</enabled><name>LINES_WRITTEN</name></field><field><id>LINES_UPDATED</id><enabled>Y</enabled><name>LINES_UPDATED</name></field><field><id>LINES_INPUT</id><enabled>Y</enabled><name>LINES_INPUT</name></field><field><id>LINES_OUTPUT</id><enabled>Y</enabled><name>LINES_OUTPUT</name></field><field><id>LINES_REJECTED</id><enabled>Y</enabled><name>LINES_REJECTED</name></field><field><id>ERRORS</id><enabled>Y</enabled><name>ERRORS</name></field><field><id>LOG_FIELD</id><enabled>N</enabled><name>LOG_FIELD</name></field></step-log-table> | |
| <metrics-log-table><connection/> | |
| <schema/> | |
| <table/> | |
| <timeout_days/> | |
| <field><id>ID_BATCH</id><enabled>Y</enabled><name>ID_BATCH</name></field><field><id>CHANNEL_ID</id><enabled>Y</enabled><name>CHANNEL_ID</name></field><field><id>LOG_DATE</id><enabled>Y</enabled><name>LOG_DATE</name></field><field><id>METRICS_DATE</id><enabled>Y</enabled><name>METRICS_DATE</name></field><field><id>METRICS_CODE</id><enabled>Y</enabled><name>METRICS_CODE</name></field><field><id>METRICS_DESCRIPTION</id><enabled>Y</enabled><name>METRICS_DESCRIPTION</name></field><field><id>METRICS_SUBJECT</id><enabled>Y</enabled><name>METRICS_SUBJECT</name></field><field><id>METRICS_TYPE</id><enabled>Y</enabled><name>METRICS_TYPE</name></field><field><id>METRICS_VALUE</id><enabled>Y</enabled><name>METRICS_VALUE</name></field></metrics-log-table> | |
| </log> | |
| <maxdate> | |
| <connection/> | |
| <table/> | |
| <field/> | |
| <offset>0.0</offset> | |
| <maxdiff>0.0</maxdiff> | |
| </maxdate> | |
| <size_rowset>10000</size_rowset> | |
| <sleep_time_empty>50</sleep_time_empty> | |
| <sleep_time_full>50</sleep_time_full> | |
| <unique_connections>N</unique_connections> | |
| <feedback_shown>Y</feedback_shown> | |
| <feedback_size>50000</feedback_size> | |
| <using_thread_priorities>Y</using_thread_priorities> | |
| <shared_objects_file/> | |
| <capture_step_performance>N</capture_step_performance> | |
| <step_performance_capturing_delay>1000</step_performance_capturing_delay> | |
| <step_performance_capturing_size_limit>100</step_performance_capturing_size_limit> | |
| <dependencies> | |
| </dependencies> | |
| <partitionschemas> | |
| </partitionschemas> | |
| <slaveservers> | |
| </slaveservers> | |
| <clusterschemas> | |
| </clusterschemas> | |
| <created_user>-</created_user> | |
| <created_date>2014/10/03 14:06:32.040</created_date> | |
| <modified_user>-</modified_user> | |
| <modified_date>2014/10/03 14:06:32.040</modified_date> | |
| </info> | |
| <notepads> | |
| </notepads> | |
| <connection> | |
| <name>Mercurio ERP Unificado (QB)</name> | |
| <server>localhost</server> | |
| <type>OpenERPDatabaseMeta</type> | |
| <access>Plugin</access> | |
| <database>mercurio_quickbooks</database> | |
| <port>8069</port> | |
| <username>admin</username> | |
| <password>Encrypted 6f6f6867d7aec6ad3cd2ca87a416b77fdb95a7db</password> | |
| <servername/> | |
| <data_tablespace/> | |
| <index_tablespace/> | |
| <attributes> | |
| <attribute><code>FORCE_IDENTIFIERS_TO_LOWERCASE</code><attribute>N</attribute></attribute> | |
| <attribute><code>FORCE_IDENTIFIERS_TO_UPPERCASE</code><attribute>N</attribute></attribute> | |
| <attribute><code>IS_CLUSTERED</code><attribute>N</attribute></attribute> | |
| <attribute><code>PORT_NUMBER</code><attribute>8069</attribute></attribute> | |
| <attribute><code>PRESERVE_RESERVED_WORD_CASE</code><attribute>N</attribute></attribute> | |
| <attribute><code>QUOTE_ALL_FIELDS</code><attribute>N</attribute></attribute> | |
| <attribute><code>SUPPORTS_BOOLEAN_DATA_TYPE</code><attribute>Y</attribute></attribute> | |
| <attribute><code>SUPPORTS_TIMESTAMP_DATA_TYPE</code><attribute>Y</attribute></attribute> | |
| <attribute><code>USE_POOLING</code><attribute>N</attribute></attribute> | |
| </attributes> | |
| </connection> | |
| <order> | |
| <hop> <from>Data Grid 1</from><to>Fuzzy match</to><enabled>Y</enabled> </hop> | |
| <hop> <from>Data Grid 2</from><to>Fuzzy match</to><enabled>Y</enabled> </hop> | |
| </order> | |
| <step> | |
| <name>Data Grid 1</name> | |
| <type>DataGrid</type> | |
| <description/> | |
| <distribute>Y</distribute> | |
| <custom_distribution/> | |
| <copies>1</copies> | |
| <partitioning> | |
| <method>none</method> | |
| <schema_name/> | |
| </partitioning> | |
| <fields> | |
| <field> | |
| <name>Tag</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| <field> | |
| <name>Name</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| <field> | |
| <name>Mine</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| </fields> | |
| <data> | |
| <line> <item>ABC</item><item>A book on C</item><item>Mine</item> </line> | |
| <line> <item>ABP</item><item>A book on Python</item><item>Borrowed</item> </line> | |
| <line> <item>ADA</item><item>ADA</item><item>Dad's</item> </line> | |
| </data> | |
| <cluster_schema/> | |
| <remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
| <xloc>157</xloc> | |
| <yloc>66</yloc> | |
| <draw>Y</draw> | |
| </GUI> | |
| </step> | |
| <step> | |
| <name>Data Grid 2</name> | |
| <type>DataGrid</type> | |
| <description/> | |
| <distribute>Y</distribute> | |
| <custom_distribution/> | |
| <copies>1</copies> | |
| <partitioning> | |
| <method>none</method> | |
| <schema_name/> | |
| </partitioning> | |
| <fields> | |
| <field> | |
| <name>Tag</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| <field> | |
| <name>Name</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| <field> | |
| <name>Mine</name> | |
| <type>String</type> | |
| <format/> | |
| <currency/> | |
| <decimal/> | |
| <group/> | |
| <length>-1</length> | |
| <precision>-1</precision> | |
| <set_empty_string>N</set_empty_string> | |
| </field> | |
| </fields> | |
| <data> | |
| <line> <item>CPP</item><item>A book on C++</item><item>Not mine</item> </line> | |
| <line> <item>ABJ</item><item>A book on Jython</item><item>Not mine</item> </line> | |
| <line> <item>ABN</item><item>A book on Nothing</item><item>Dad's</item> </line> | |
| </data> | |
| <cluster_schema/> | |
| <remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
| <xloc>159</xloc> | |
| <yloc>167</yloc> | |
| <draw>Y</draw> | |
| </GUI> | |
| </step> | |
| <step> | |
| <name>Fuzzy match</name> | |
| <type>FuzzyMatch</type> | |
| <description/> | |
| <distribute>Y</distribute> | |
| <custom_distribution/> | |
| <copies>1</copies> | |
| <partitioning> | |
| <method>none</method> | |
| <schema_name/> | |
| </partitioning> | |
| <from>Data Grid 1</from> | |
| <lookupfield>Name</lookupfield> | |
| <mainstreamfield>Name</mainstreamfield> | |
| <outputmatchfield>match</outputmatchfield> | |
| <outputvaluefield>measure value</outputvaluefield> | |
| <caseSensitive>N</caseSensitive> | |
| <closervalue>Y</closervalue> | |
| <minimalValue>0</minimalValue> | |
| <maximalValue>2</maximalValue> | |
| <separator>,</separator> | |
| <algorithm>levenshtein</algorithm> | |
| <lookup> | |
| <value> | |
| <name>Tag</name> | |
| <rename>Tag</rename> | |
| </value> | |
| <value> | |
| <name>Name</name> | |
| <rename>Name</rename> | |
| </value> | |
| <value> | |
| <name>Mine</name> | |
| <rename>Mine</rename> | |
| </value> | |
| </lookup> | |
| <cluster_schema/> | |
| <remotesteps> <input> </input> <output> </output> </remotesteps> <GUI> | |
| <xloc>284</xloc> | |
| <yloc>121</yloc> | |
| <draw>Y</draw> | |
| </GUI> | |
| </step> | |
| <step_error_handling> | |
| </step_error_handling> | |
| <slave-step-copy-partition-distribution> | |
| </slave-step-copy-partition-distribution> | |
| <slave_transformation>N</slave_transformation> | |
| </transformation> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment