@KFStep(name="Sorter", category="Tools", toolTipText="Sort instances in ascending or descending order according to the values of user-specified attributes. Instances can be sorted according to multiple attributes (defined in order). Handles datasets larger than can be fit into main memory via instance connections and specifying the in-memory buffer size. Implements a merge-sort by writing the sorted in-memory buffer to a file when full and then interleaving instances from the disk-based file(s) when the incoming stream has finished.", iconPath="weka/gui/knowledgeflow/icons/Sorter.gif") public class Sorter extends BaseStep
Modifier and Type | Class and Description |
---|---|
static class |
Sorter.SortRule
Implements a sorting rule based on a single attribute
|
Constructor and Description |
---|
Sorter() |
Modifier and Type | Method and Description |
---|---|
java.lang.String |
getBufferSize()
Get the size of the in-memory buffer
|
java.lang.String |
getCustomEditorForStep()
Return the fully qualified name of a custom editor component (JComponent)
to use for editing the properties of the step.
|
java.util.List<java.lang.String> |
getIncomingConnectionTypes()
Get a list of incoming connection types that this step can accept.
|
java.util.List<java.lang.String> |
getOutgoingConnectionTypes()
Get a list of outgoing connection types that this step can produce.
|
java.lang.String |
getSortDetails()
Get the sort rules to use
|
java.io.File |
getTempDirectory()
Get the directory to use for temporary files during incremental operation
|
void |
processIncoming(Data data)
Process an incoming data payload (if the step accepts incoming connections)
|
void |
setBufferSize(java.lang.String buffSize)
Set the size of the in-memory buffer
|
void |
setSortDetails(java.lang.String sortDetails)
Set the sort rules to use
|
void |
setTempDirectory(java.io.File tempDir)
Set the directory to use for temporary files during incremental operation
|
void |
stepInit()
Initialize the step.
|
environmentSubstitute, getDefaultSettings, getInteractiveViewers, getInteractiveViewersImpls, getName, getStepManager, globalInfo, isResourceIntensive, isStopRequested, outputStructureForConnectionType, outputStructureForConnectionType, setName, setStepIsResourceIntensive, setStepManager, setStepMustRunSingleThreaded, start, stepMustRunSingleThreaded, stop
public java.lang.String getBufferSize()
@OptionMetadata(displayName="Size of in-mem streaming buffer", description="Number of instances to sort in memory before writing to a temp file (instance connections only)", displayOrder=1) public void setBufferSize(java.lang.String buffSize)
buffSize
- the size of the in-memory buffer@FilePropertyMetadata(fileChooserDialogType=0, directoriesOnly=true) @OptionMetadata(displayName="Directory for temp files", description="Where to store temporary files when spilling to disk", displayOrder=2) public void setTempDirectory(java.io.File tempDir)
tempDir
- the temp dir to usepublic java.io.File getTempDirectory()
@ProgrammaticProperty public void setSortDetails(java.lang.String sortDetails)
sortDetails
- the sort rules in internal string representationpublic java.lang.String getSortDetails()
public void stepInit() throws WekaException
WekaException
- if a problem occurs during initializationpublic java.util.List<java.lang.String> getIncomingConnectionTypes()
public java.util.List<java.lang.String> getOutgoingConnectionTypes()
public void processIncoming(Data data) throws WekaException
processIncoming
in interface BaseStepExtender
processIncoming
in interface Step
processIncoming
in class BaseStep
data
- the data to processWekaException
- if a problem occurspublic java.lang.String getCustomEditorForStep()
getCustomEditorForStep
in interface Step
getCustomEditorForStep
in class BaseStep