Option Explicit
Const sMasterCellListSHEET = "Sheet1"
Const sMasterCellListHeaderCELL = "A2"
Sub ClearReultsAreaInColumnsBandC()
Sheets(sMasterCellListSHEET).Range("B3:C" & Rows.Count).ClearContents
End Sub
Sub AddContentToColumnsBandC()
Dim a() As String
Dim wsMaster As Worksheet
Dim i As Long
Dim iColumn As Long
Dim iLastIndex As Long
Dim iRow As Long
Dim bNeedMore As Boolean
Dim sOutputColumn As String
Dim sToken As String
Dim sTokenPrevious As String
Dim sValue As String
'Create the Worksheet Object
Set wsMaster = ThisWorkbook.Sheets(sMasterCellListSHEET)
'Clear the Output Area
Call ClearReultsAreaInColumnsBandC
'Get the Row before the first row and get the data column
iRow = wsMaster.Range(sMasterCellListHeaderCELL).Row 'One row prior to the first data row
iColumn = wsMaster.Range(sMasterCellListHeaderCELL).Column
'Process 1 row at a time
bNeedMore = True
While bNeedMore
'Increment the row number
iRow = iRow + 1
'Get the next cell as text (remove leading and trailing spaces)
sValue = Trim(wsMaster.Cells(iRow, iColumn))
'Exit if the cell is blank
'Otherwise process the data line
If Len(sValue) = 0 Then
bNeedMore = False
Else
'Remove Embedded Commas from numbers
'Add a space between a Number and a Non-Number
sValue = NormalizeText(sValue)
'Convert everyting to Upper Case and add a leading space and a trailing space
sValue = " " & UCase(sValue) & " "
'Replace punctuation with commas
sValue = Replace(sValue, ".", " ") 'Replace periods [.] in the string with spaces
sValue = Replace(sValue, ",", " ") 'Replace commas [,] in the string with spaces
'Remove irrelevant KEYWORDS
'NOTE: Each KEYWORD below is surrounded by a leading and trailing space, and replaced by a single space
sValue = Replace(sValue, " WAS ", " ")
sValue = Replace(sValue, " WERE ", " ")
sValue = Replace(sValue, " PERSON ", " ")
sValue = Replace(sValue, " PEOPLE ", " ")
sValue = Replace(sValue, " MEN ", " ")
sValue = Replace(sValue, " MAN ", " ")
sValue = Replace(sValue, " WOMAN ", " ")
sValue = Replace(sValue, " WOMEN ", " ")
sValue = Replace(sValue, " CIVILIANS ", " ")
sValue = Replace(sValue, " CIVILIAN ", " ")
sValue = Replace(sValue, " VICTIMS ", " ")
sValue = Replace(sValue, " VICTIM ", " ")
'Parse the string into tokens
iLastIndex = LjmParseString(sValue, a)
'Search the tokens for KEYWORDS
'If a KEYWORD is Found, then examine the previous KEYWORD
'If the previous KEYWORD is Numeric (e.g. '10' or '10+') put the KEYWORD in the Output Area
'Multiple Matches for the same line are allowed
If iLastIndex >= 0 Then
sTokenPrevious = a(0)
For i = 1 To iLastIndex
'Get the Next Token
sToken = a(i)
sOutputColumn = ""
If sToken = "DEAD" Or sToken = "KILLED" Or sToken = "DEATHS" Then
sOutputColumn = "B"
End If
If sToken = "INJURED" Or sToken = "WOUNDED" Then
sOutputColumn = "C"
End If
If Len(sOutputColumn) > 0 Then
If IsNumeric(sTokenPrevious) Then
'Get the value in the Ouput Column (remove leading and trailing spaces)
'If the value is BLANK, put the PREVIOUS TOKEN in the Ouput Columm
'Otherwise, append the PREVIOUS TOKEN to the Ouput Columm
sValue = Trim(wsMaster.Cells(iRow, sOutputColumn).Value)
If Len(sValue) = 0 Then
sValue = sTokenPrevious
Else
sValue = sValue & " ; " & sTokenPrevious
If Left(sValue, 1) <> "'" Then
sValue = "'" & sValue
End If
End If
wsMaster.Cells(iRow, sOutputColumn).Value = sValue
End If
End If
'Save the Token for use as the 'Previous Token' in the next pass
sTokenPrevious = sToken
Next i
End If
End If
Wend
'Clear Worksheet object
Set wsMaster = Nothing
End Sub
Function NormalizeText(sInvalue As String) As String
'This normalizes text by:
'a. Removing commas embedded inside numbers, only if the string contains 3 or more characters
'b. Adding an embedded space, if a number is next to a non-number
Dim i As Long
Dim sValue As String
Dim sValueNew As String
Dim c1 As String
Dim c2 As String
Dim c3 As String
'''''''''''''''''''''''''''''''''''''''''''''''''''
'Remove commas embedded inside numbers, only if the string contains 3 or more characters
'''''''''''''''''''''''''''''''''''''''''''''''''''
sValue = sInvalue
If Len(sValue) < 3 Then
sValueNew = sValue
Else
'Initialize Characters 1 and 2
c1 = Mid(sValue, 1, 1)
c2 = Mid(sValue, 2, 1)
'Remove Character 1 if it is a comma
If c1 <> "," Then
sValueNew = c1
End If
'Process each remaining Character in the string
For i = 3 To Len(sValue)
'Get the New Character 3
c3 = Mid(sValue, i, 1)
If c2 = "," And IsNumeric(c1) And IsNumeric(c3) Then
'Do nothing - comma is embedded between two numbers
Else
sValueNew = sValueNew & c2
End If
'Move the values for the next pass
c1 = c2
c2 = c3
Next i
sValueNew = sValueNew & c3
End If
'''''''''''''''''''''''''''''''''''''''''''''''''''
'Add an embedded space if a number is next to a non-number
'''''''''''''''''''''''''''''''''''''''''''''''''''
sValue = sValueNew
If Len(sValue) >= 2 Then
'Initialize Character 1
c1 = Mid(sValue, 1, 1)
sValueNew = c1
'Process each remaining Character in the string
For i = 2 To Len(sValue)
'Get the New Character 2
c2 = Mid(sValue, i, 1)
'Ignore '+' sign when processing
If c2 <> "+" Then
If IsNumeric(c1) = False And IsNumeric(c2) = False Then
'Do nothing - do nothing 2 consecutive NUMBERS are OK
ElseIf IsNumeric(c1) = True And IsNumeric(c2) = True Then
'Do nothing - do nothing 2 consecutive NON-NUMBERS are OK
ElseIf IsNumeric(c1) = True And IsNumeric(c2) = False Then
'Add an embedded Space - Adjoining NUMBER and NON-NUMBER
sValueNew = sValueNew & " "
Else
'Add an embedded Space - Adjoining NUMBER and NON-NUMBER
sValueNew = sValueNew & " "
End If
End If
sValueNew = sValueNew & c2
'Move the values for the next pass
c1 = c2
Next i
End If
'Set the Return Value
NormalizeText = sValueNew
End Function
Function LjmParseString(InputString As String, ByRef sArray() As String) As Integer
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
'
' This parses a space delimited string into an array of tokens.
' Leading and trailing spaces are stripped from the string in the process.
'
'''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Dim i As Integer
Dim LastNonEmpty As Integer
Dim iSplitIndex As Integer
'Initialization
LastNonEmpty = -1
'Split the string into tokens
sArray = Split(InputString)
iSplitIndex = UBound(sArray)
'Remove the null tokens
For i = 0 To iSplitIndex
If sArray(i) <> "" Then
'Get rid of all the whitespace
LastNonEmpty = LastNonEmpty + 1
sArray(LastNonEmpty) = sArray(i)
End If
Next i
'Return the number of indices
LjmParseString = LastNonEmpty
End Function
If you need help with Macros and or VBA:
Bookmarks