Examples of use
An example of using extraction scripts in C#
FieldExtractor.ExtractRegularExpression( "(grant(s))|(convey to)|(grant)", "keyWords" );
FieldExtractor.ExtractWordsFromUserDictionary("dictionary", "English");
FieldExtractor.ExtractNerObjects();
// Access identified objects by collection name
IExtractedObjects personObjects = FieldExtractor.ExtractedObjects( "NerPerson" );
IExtractedObjects keyWordsObjects = FieldExtractor.ExtractedObjects( "keyWords" );
// Look for matching person to left of keyword
for( int i = 0; i < keyWordsObjects.Count; i++ ) {
IExtractedObject keyWord = personObjects.Item( i );
IInterval span = keyWord.Span;
IExtractedObject grantor = personObjects.Find( span.StartPos, false );
string grantorName = grantor.Value;
if ( grantorName.Length > 2 && grantorName != "Doug Darrell" ) {
FieldExtractor.PutSpanToField( grantor.Span, "NlpField1" );
}
}
// Access source text
string sourceText = FieldExtractor.SourceText;
// Write any text span to field
FieldExtractor.PutTextToField( 0, sourceText.Length - 1, "NlpField2" );
// An XML query that looks for two-word dictionary phrases
string query = "<Request> " +
"<Query>" +
"<Contain MaxDistance=\"1\">" +
"<Required>" +
"<Form><Attributes><Attribute>dictionary1</Attribute></Attributes></Form>" +
"</Required>" +
"<Required>" +
"<Form><Attributes><Attribute>dictionary2</Attribute></Attributes></Form>" +
"</Required>" +
"</Contain>" +
"</Query>" +
"</Request>";
// Run query, get collection of results, and save them to field
FieldExtractor.RunQueryAndSaveToField( query, "query1", "NlpField3");
// Run query and get collection of results
IExtractedObjects queryResults1 = FieldExtractor.RunQuery( query, "query2" );
// An alternative way of accessing query results after running the query
IExtractedObjects queryResults2 = FieldExtractor.QueryResults( "query2" );
Script parameters
Name | Type | Permissions | Value |
FieldExtractor | IFieldExtractor | Read | Identifies fields in the text of a document |
Sample address extraction script
The script is called for the entire source field.
// Parse the address
this.ParseAddress();
// Extract the components into separate fields
var zip = this.ExtractedObjects( "NerZipCode" );
var street = this.ExtractedObjects( "NerStreet" );
for( var i = 0; i < zip.Count; i++ ) {
this.PutSpanToField( zip.Item(i).Span, "ZipCode" );
}
for( var j = 0; j < street.Count; j++ ) {
this.PutSpanToField( street.Item(j).Span, "Street" );
}
Sample address extraction script
The script is called for part of the source field.
// Detect all NER objects in the text of the field or section
this.ExtractNerObjects();
// Extract the Address object
var address = this.ExtractedObjects( "NerAddress" );
// Split the address into components
for( var addressSpanIndex = 0; addressSpanIndex < address.Count; addressSpanIndex++ ) {
this.PutSpanToField( address.Item(addressSpanIndex).Span, "Address" );
// Assign a unique prefix to the names of all component collections for the given address
var collectionName = "nerAddress" + String(addressSpanIndex);
// Extract the address
this.ParseAddressInPosition( collectionName, address.Item(addressSpanIndex).Span.StartPos, address.Item(addressSpanIndex).Span.EndPos );
// Save the components into separate fields
var zip = this.ExtractedObjects( collectionName, "NerZipCode" );
var street = this.ExtractedObjects( collectionName, "NerStreet" );
RunQueryAndSaveToField
for( var i = 0; i < zip.Count; i++ ) {
this.PutSpanToField( zip.Item(i).Span, "ZipCode" );
}
for( var j = 0; j < street.Count; j++ ) {
this.PutSpanToField( street.Item(j).Span, "Street" );
}
}
12.04.2024 18:16:03