I have a table, which includes in one column the values ANNI and in other lines ANNI 2343355.
I want to split the cells which includes ANNI 2343355.
That's what I have:
And this is what I want:
Actually the answers helped me a lot, but now I have another Problem. that's my process now
But the result looks like that: result
I don't know where I messed up your code, but I would be very thankful if u could help me another time.
you can use the Split operator to separate a string value based on a regex pattern. In your example, a simple \s
for a whitespace character is sufficient. Afterwards you need to do some post-processing do get everything in the desired format. See the example below. You can just copy&paste the XML snippet in your RapidMiner process window.
<?xml version="1.0" encoding="UTF-8"?><process version="9.0.000-BETA">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="9.0.000-BETA" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="subprocess" compatibility="9.0.000-BETA" expanded="true" height="82" name="Subprocess" width="90" x="112" y="34">
<process expanded="true">
<operator activated="true" class="generate_data_user_specification" compatibility="9.0.000-BETA" expanded="true" height="68" name="Generate Data by User Specification" width="90" x="45" y="34">
<list key="attribute_values">
<parameter key="Name" value=""ANNI 12345""/>
<parameter key="Number" value="67890"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="generate_data_user_specification" compatibility="9.0.000-BETA" expanded="true" height="68" name="Generate Data by User Specification (2)" width="90" x="45" y="136">
<list key="attribute_values">
<parameter key="Name" value=""ANNI""/>
<parameter key="Number" value="67890"/>
</list>
<list key="set_additional_roles"/>
</operator>
<operator activated="true" class="append" compatibility="9.0.000-BETA" expanded="true" height="103" name="Append" width="90" x="246" y="34"/>
<connect from_op="Generate Data by User Specification" from_port="output" to_op="Append" to_port="example set 1"/>
<connect from_op="Generate Data by User Specification (2)" from_port="output" to_op="Append" to_port="example set 2"/>
<connect from_op="Append" from_port="merged set" to_port="out 1"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
<description align="center" color="red" colored="true" width="126">Create sample data</description>
</operator>
<operator activated="true" class="split" compatibility="9.0.000-BETA" expanded="true" height="82" name="Split" width="90" x="313" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="Name"/>
<parameter key="split_pattern" value="\s"/>
</operator>
<operator activated="true" class="rename" compatibility="9.0.000-BETA" expanded="true" height="82" name="Rename" width="90" x="514" y="34">
<parameter key="old_name" value="Name_1"/>
<parameter key="new_name" value="Name"/>
<list key="rename_additional_attributes">
<parameter key="Name_2" value="New Number"/>
</list>
<description align="center" color="purple" colored="true" width="126">Change the names of the two split attributes</description>
</operator>
<operator activated="true" class="parse_numbers" compatibility="9.0.000-BETA" expanded="true" height="82" name="Parse Numbers" width="90" x="648" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="New Number"/>
<description align="center" color="purple" colored="true" width="126">Name was a string before, so we have to parse the &quot;New Number&quot; column</description>
</operator>
<operator activated="true" class="generate_attributes" compatibility="9.0.000-BETA" expanded="true" height="82" name="Generate Attributes" width="90" x="782" y="34">
<list key="function_descriptions">
<parameter key="New Number" value="if([New Number]==MISSING_NUMERIC, Number,[New Number])"/>
</list>
</operator>
<connect from_op="Subprocess" from_port="out 1" to_op="Split" to_port="example set input"/>
<connect from_op="Split" from_port="example set output" to_op="Rename" to_port="example set input"/>
<connect from_op="Rename" from_port="example set output" to_op="Parse Numbers" to_port="example set input"/>
<connect from_op="Parse Numbers" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<description align="center" color="yellow" colored="false" height="107" resized="false" width="180" x="277" y="119">Split the &quot;Name&quot; column (called Attribute in RapidMiner) along the blank (whitespace symbol \s)</description>
<description align="center" color="green" colored="true" height="149" resized="true" width="169" x="764" y="125">Replace the missing values in &quot;New Number&quot; (where the Name didn't have a number) with the value of the Number column</description>
</process>
</operator>
</process>