Fuzzy String Search.cls


VERSION 1.0 CLASS
BEGIN
MultiUse = -1  'True
End
Attribute VB_Name = "CFuzzyStringSearch"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = True
Attribute VB_PredeclaredId = False
Attribute VB_Exposed = False
Attribute VB_Ext_KEY = "SavedWithClassBuilder" ,"No"
Attribute VB_Ext_KEY = "Top_Level" ,"Yes"



' Class       : CFuzzyStringSearch
' Description : This class performs fuzzy string searching
' (c) Ultimate Napstr

VERSION 1.0 CLASS
BEGIN
MultiUse = -1  'True
End
Attribute VB_Name = "CFuzzyStringSearch"
Attribute VB_GlobalNameSpace = False
Attribute VB_Creatable = True
Attribute VB_PredeclaredId = False
Attribute VB_Exposed = False
Attribute VB_Ext_KEY = "SavedWithClassBuilder" ,"No"
Attribute VB_Ext_KEY = "Top_Level" ,"Yes"


' Class       : CFuzzyStringSearch
' Description : This class performs fuzzy string searching
' (c) Ultimate Napstr


' variables for static property data
Private m_strFindText As String
Private m_strText As String
Private m_intMaxCharacterDifference As Integer


' Internal variables
Private mabytText() As Byte
Private mabytFind() As Byte
Private mlngIndex As Long
Private mlngFindLen As Long
Private mlngLeftDifference As Long
Private mlngRightDifference As Long
Private mlngLeftOffset As Long
Private mlngRightOffset As Long
Private malngDifference() As Long
Private mlngTextLen As Long
Private mfCaseSensitive As Boolean


Private Sub Class_Initialize()
  ' Set initial values to defaults which may be overridden
  ' with property settings
  ' (c) Ultimate Napstr


  m_intMaxCharacterDifference = 1


End Sub


Public Property Get CaseSensitive() As Boolean
  ' Returns: Whether or not the search is case sensitive
  ' (c) Ultimate Napstr


  CaseSensitive = mfCaseSensitive


End Property


Public Property Let CaseSensitive(ByVal fValue As Boolean)
  ' fValue: Set whether or not the search is case sensitive
  ' (c) Ultimate Napstr


  mfCaseSensitive = fValue
  
End Property


Public Property Get FindText() As String
  ' Returns: the text to search for
  ' (c) Ultimate Napstr


  FindText = m_strFindText


End Property


Public Property Let FindText(ByVal strValue As String)
  ' strValue: Set the text to search for
  ' (c) Ultimate Napstr


  Dim intCounter As Integer
    
  On Error GoTo PROC_ERR
  
  m_strFindText = strValue
  
  ' Store the string in a byte array
  If Not mfCaseSensitive Then
    ' If the search is not case sensitive, convert the string to upper case
    mabytFind = StrConv(UCase(strValue), vbFromUnicode)
  Else
    mabytFind = StrConv(strValue, vbFromUnicode)
  End If
  
  ' initialize


  ' store the length of the string
  mlngFindLen = Len(strValue)
  
  ' Create the difference array
  ReDim malngDifference((mlngFindLen + 1) * 4) As Long
  
  ' Initialize the difference indexes
  mlngLeftDifference = 0
  mlngRightDifference = mlngLeftDifference + mlngFindLen + 1
  mlngLeftOffset = mlngRightDifference + mlngFindLen + 1
  mlngRightOffset = mlngLeftOffset + mlngFindLen + 1
  
  ' Initialize the difference array
  For intCounter = 0 To mlngFindLen
    malngDifference(mlngRightDifference + intCounter) = intCounter
    malngDifference(mlngRightOffset + intCounter) = 1
  Next intCounter
  
  ' Reset the index into the search string
  Reset
  
PROC_EXIT:
  Exit Property


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "FindText"
  Resume PROC_EXIT
  
End Property


Public Property Get MaxCharacterDifference() As Integer
  ' Returns: the number of different characters allowed for a match
  ' (c) Ultimate Napstr


  MaxCharacterDifference = m_intMaxCharacterDifference


End Property


Public Property Let MaxCharacterDifference(ByVal intValue As Integer)
  ' intValue: Set the number of different characters allowed for a match
  ' (c) Ultimate Napstr


  m_intMaxCharacterDifference = intValue
  
End Property


Public Property Get Text() As String
  ' Returns: the text being searched
  ' (c) Ultimate Napstr


  Text = m_strText


End Property


Public Property Let Text(ByVal strValue As String)
  ' strValue: Set the text to search
  ' (c) Ultimate Napstr


  On Error GoTo PROC_ERR


  m_strText = strValue
  
  ' Store the string in a byte array
  If Not mfCaseSensitive Then
    ' If the search is not case sensitive, convert the string to upper case
    mabytText = StrConv(UCase(strValue), vbFromUnicode)
  Else
    mabytText = StrConv(strValue, vbFromUnicode)
  End If
  
  mlngTextLen = Len(strValue)
  
PROC_EXIT:
  Exit Property


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "Text"
  Resume PROC_EXIT


End Property


Public Function FindNext( _
  lngFindStart As Long, _
  lngFindLength As Long, _
  intCharacterDifference As Integer) _
  As Boolean
  ' Comments  : Finds the next matching string
  ' Parameters: lngFindStart - The position in the string where the match was
  '             found
  '             lngFindLength - The Length of the match
  '             intCharacterDifference - The number of characters the match
  '             differs from the original
  ' Returns   : True if a match was found, False otherwise
  ' (c) Ultimate Napstr
  '
  Dim lngTemp As Long
  Dim lngDiff1 As Long
  Dim lngDiff2 As Long
  Dim lngDiff3 As Long
  Dim lngCounter As Long
      
  On Error GoTo PROC_ERR
  
  ' By default we have not found a match
  FindNext = False
  ' Reset the start of the match
  lngFindStart = 0


  ' While we have not found a match and there is more text to search
  Do While lngFindStart = 0 And mlngIndex < mlngTextLen - 1
    ' Advance one character into the search text
    mlngIndex = mlngIndex + 1


    ' Swap the left and right offset indexes
    lngTemp = mlngRightOffset
    mlngRightOffset = mlngLeftOffset
    mlngLeftOffset = lngTemp
    malngDifference(mlngRightOffset + 1) = 0
    
    ' Swap the left and right difference indexes
    lngTemp = mlngRightDifference
    mlngRightDifference = mlngLeftDifference
    mlngLeftDifference = lngTemp
    malngDifference(mlngRightDifference) = 0
    
    ' For each character in the find text
    For lngCounter = 0 To mlngFindLen - 1
            
      If (mabytFind(lngCounter) = mabytText(mlngIndex)) Then
        ' If the characters match, get the difference of this character
        lngDiff1 = malngDifference(mlngLeftDifference + lngCounter)
      Else
        ' Otherwise, add one to the difference of this character
        lngDiff1 = malngDifference(mlngLeftDifference + lngCounter) + 1
      End If
      
      ' Determine difference of neighbor characters
      lngDiff2 = malngDifference(mlngLeftDifference + (lngCounter + 1)) + 1
      lngDiff3 = malngDifference(mlngRightDifference + lngCounter) + 1
  
      ' Determine lowest value
      If (lngDiff2 < lngDiff1) Then
        lngDiff1 = lngDiff2
      End If
      
      If (lngDiff3 < lngDiff1) Then
        lngDiff1 = lngDiff3
      End If
  
      ' Assign lowest value to the right difference
      malngDifference(mlngRightDifference + (lngCounter + 1)) = lngDiff1
    Next lngCounter


    ' Determine right offset based on the location of the match found above
    If (mlngFindLen > 1) Then
      For lngCounter = 2 To mlngFindLen
        If (malngDifference(mlngLeftDifference + (lngCounter - 1)) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + (lngCounter - 1)) - 1
        ElseIf (malngDifference(mlngRightDifference + (lngCounter - 1)) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngRightOffset + (lngCounter - 1))
        ElseIf (malngDifference(mlngLeftDifference + lngCounter) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + lngCounter) - 1
        Else
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + (lngCounter - 1)) - 1
        End If
      Next lngCounter
    End If


    ' Check to see if we have an approximate match
    If (malngDifference(mlngRightDifference + mlngFindLen) <= _
      m_intMaxCharacterDifference) Then
      ' If we have a match, assign Start, length, and difference of string
      lngFindStart = mlngIndex + malngDifference(mlngRightOffset + _
        mlngFindLen) + 1
      lngFindLength = (mlngIndex - lngFindStart) + 2
      intCharacterDifference = malngDifference(mlngRightDifference + mlngFindLen)
      FindNext = True
    End If
  Loop
      
PROC_EXIT:
  Exit Function


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "FindNext"
  Resume PROC_EXIT


End Function


Public Sub Reset()
  ' Comments  : Reset the search
  ' Parameters: None
  ' Returns   : Nothing
  ' (c) Ultimate Napstr
  '
  ' Reset the index into the search string
  mlngIndex = -1


End Sub



' variables for static property data
Private m_strFindText As String
Private m_strText As String
Private m_intMaxCharacterDifference As Integer


' Internal variables
Private mabytText() As Byte
Private mabytFind() As Byte
Private mlngIndex As Long
Private mlngFindLen As Long
Private mlngLeftDifference As Long
Private mlngRightDifference As Long
Private mlngLeftOffset As Long
Private mlngRightOffset As Long
Private malngDifference() As Long
Private mlngTextLen As Long
Private mfCaseSensitive As Boolean


Private Sub Class_Initialize()
  ' Set initial values to defaults which may be overridden
  ' with property settings
  ' (c) Ultimate Napstr


  m_intMaxCharacterDifference = 1


End Sub


Public Property Get CaseSensitive() As Boolean
  ' Returns: Whether or not the search is case sensitive
  ' (c) Ultimate Napstr


  CaseSensitive = mfCaseSensitive


End Property


Public Property Let CaseSensitive(ByVal fValue As Boolean)
  ' fValue: Set whether or not the search is case sensitive
  ' (c) Ultimate Napstr


  mfCaseSensitive = fValue
  
End Property


Public Property Get FindText() As String
  ' Returns: the text to search for
  ' (c) Ultimate Napstr


  FindText = m_strFindText


End Property


Public Property Let FindText(ByVal strValue As String)
  ' strValue: Set the text to search for
  ' (c) Ultimate Napstr


  Dim intCounter As Integer
    
  On Error GoTo PROC_ERR
  
  m_strFindText = strValue
  
  ' Store the string in a byte array
  If Not mfCaseSensitive Then
    ' If the search is not case sensitive, convert the string to upper case
    mabytFind = StrConv(UCase(strValue), vbFromUnicode)
  Else
    mabytFind = StrConv(strValue, vbFromUnicode)
  End If
  
  ' initialize


  ' store the length of the string
  mlngFindLen = Len(strValue)
  
  ' Create the difference array
  ReDim malngDifference((mlngFindLen + 1) * 4) As Long
  
  ' Initialize the difference indexes
  mlngLeftDifference = 0
  mlngRightDifference = mlngLeftDifference + mlngFindLen + 1
  mlngLeftOffset = mlngRightDifference + mlngFindLen + 1
  mlngRightOffset = mlngLeftOffset + mlngFindLen + 1
  
  ' Initialize the difference array
  For intCounter = 0 To mlngFindLen
    malngDifference(mlngRightDifference + intCounter) = intCounter
    malngDifference(mlngRightOffset + intCounter) = 1
  Next intCounter
  
  ' Reset the index into the search string
  Reset
  
PROC_EXIT:
  Exit Property


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "FindText"
  Resume PROC_EXIT
  
End Property


Public Property Get MaxCharacterDifference() As Integer
  ' Returns: the number of different characters allowed for a match
  ' (c) Ultimate Napstr


  MaxCharacterDifference = m_intMaxCharacterDifference


End Property


Public Property Let MaxCharacterDifference(ByVal intValue As Integer)
  ' intValue: Set the number of different characters allowed for a match
  ' (c) Ultimate Napstr


  m_intMaxCharacterDifference = intValue
  
End Property


Public Property Get Text() As String
  ' Returns: the text being searched
  ' (c) Ultimate Napstr


  Text = m_strText


End Property


Public Property Let Text(ByVal strValue As String)
  ' strValue: Set the text to search
  ' (c) Ultimate Napstr


  On Error GoTo PROC_ERR


  m_strText = strValue
  
  ' Store the string in a byte array
  If Not mfCaseSensitive Then
    ' If the search is not case sensitive, convert the string to upper case
    mabytText = StrConv(UCase(strValue), vbFromUnicode)
  Else
    mabytText = StrConv(strValue, vbFromUnicode)
  End If
  
  mlngTextLen = Len(strValue)
  
PROC_EXIT:
  Exit Property


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "Text"
  Resume PROC_EXIT


End Property


Public Function FindNext( _
  lngFindStart As Long, _
  lngFindLength As Long, _
  intCharacterDifference As Integer) _
  As Boolean
  ' Comments  : Finds the next matching string
  ' Parameters: lngFindStart - The position in the string where the match was
  '             found
  '             lngFindLength - The Length of the match
  '             intCharacterDifference - The number of characters the match
  '             differs from the original
  ' Returns   : True if a match was found, False otherwise
  ' (c) Ultimate Napstr
  '
  Dim lngTemp As Long
  Dim lngDiff1 As Long
  Dim lngDiff2 As Long
  Dim lngDiff3 As Long
  Dim lngCounter As Long
      
  On Error GoTo PROC_ERR
  
  ' By default we have not found a match
  FindNext = False
  ' Reset the start of the match
  lngFindStart = 0


  ' While we have not found a match and there is more text to search
  Do While lngFindStart = 0 And mlngIndex < mlngTextLen - 1
    ' Advance one character into the search text
    mlngIndex = mlngIndex + 1


    ' Swap the left and right offset indexes
    lngTemp = mlngRightOffset
    mlngRightOffset = mlngLeftOffset
    mlngLeftOffset = lngTemp
    malngDifference(mlngRightOffset + 1) = 0
    
    ' Swap the left and right difference indexes
    lngTemp = mlngRightDifference
    mlngRightDifference = mlngLeftDifference
    mlngLeftDifference = lngTemp
    malngDifference(mlngRightDifference) = 0
    
    ' For each character in the find text
    For lngCounter = 0 To mlngFindLen - 1
            
      If (mabytFind(lngCounter) = mabytText(mlngIndex)) Then
        ' If the characters match, get the difference of this character
        lngDiff1 = malngDifference(mlngLeftDifference + lngCounter)
      Else
        ' Otherwise, add one to the difference of this character
        lngDiff1 = malngDifference(mlngLeftDifference + lngCounter) + 1
      End If
      
      ' Determine difference of neighbor characters
      lngDiff2 = malngDifference(mlngLeftDifference + (lngCounter + 1)) + 1
      lngDiff3 = malngDifference(mlngRightDifference + lngCounter) + 1
  
      ' Determine lowest value
      If (lngDiff2 < lngDiff1) Then
        lngDiff1 = lngDiff2
      End If
      
      If (lngDiff3 < lngDiff1) Then
        lngDiff1 = lngDiff3
      End If
  
      ' Assign lowest value to the right difference
      malngDifference(mlngRightDifference + (lngCounter + 1)) = lngDiff1
    Next lngCounter


    ' Determine right offset based on the location of the match found above
    If (mlngFindLen > 1) Then
      For lngCounter = 2 To mlngFindLen
        If (malngDifference(mlngLeftDifference + (lngCounter - 1)) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + (lngCounter - 1)) - 1
        ElseIf (malngDifference(mlngRightDifference + (lngCounter - 1)) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngRightOffset + (lngCounter - 1))
        ElseIf (malngDifference(mlngLeftDifference + lngCounter) < _
          malngDifference(mlngRightDifference + lngCounter)) Then
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + lngCounter) - 1
        Else
          malngDifference(mlngRightOffset + lngCounter) = _
            malngDifference(mlngLeftOffset + (lngCounter - 1)) - 1
        End If
      Next lngCounter
    End If


    ' Check to see if we have an approximate match
    If (malngDifference(mlngRightDifference + mlngFindLen) <= _
      m_intMaxCharacterDifference) Then
      ' If we have a match, assign Start, length, and difference of string
      lngFindStart = mlngIndex + malngDifference(mlngRightOffset + _
        mlngFindLen) + 1
      lngFindLength = (mlngIndex - lngFindStart) + 2
      intCharacterDifference = malngDifference(mlngRightDifference + mlngFindLen)
      FindNext = True
    End If
  Loop
      
PROC_EXIT:
  Exit Function


PROC_ERR:
  MsgBox "Error: " & Err.Number & ". " & Err.Description, , _
    "FindNext"
  Resume PROC_EXIT


End Function


Public Sub Reset()
  ' Comments  : Reset the search
  ' Parameters: None
  ' Returns   : Nothing
  ' (c) Ultimate Napstr
  '
  ' Reset the index into the search string
  mlngIndex = -1


End Sub