Как найти точку вставки в массиве с помощью бинарного поиска?


Основная идея бинарного поиска в массиве проста, но он может возвращать "приблизительный" индекс, если поиск не сможет найти точный элемент. (иногда мы можем получить индекс, для которого значение больше или меньше искомого значения).

Для поиска точной точки вставки, похоже, что после того, как мы получили приблизительное местоположение, нам, возможно, потребуется "сканировать" влево или вправо для точного местоположения вставки, так что, скажем, в Ruby, мы можем сделать arr.insert(exact_index, value)

У меня есть следующее решение, но обработка для части, когда begin_index >= end_index немного беспорядочная. Интересно, можно ли использовать более элегантное решение?

(это решение не требует поиска нескольких совпадений, если найдено точное совпадение, поэтому индекс, возвращаемый для точного совпадения, может указывать на любой индекс, соответствующий значению... но я думаю, что если все они целые числа, мы всегда можем искать a - 1 после того, как мы знаем, что точное соответствие найдено, чтобы найти левую границу, или искать a + 1 для правой граница.)

Мое решение:

DEBUGGING = true

def binary_search_helper(arr, a, begin_index, end_index)
  middle_index = (begin_index + end_index) / 2
  puts "a = #{a}, arr[middle_index] = #{arr[middle_index]}, " +
           "begin_index = #{begin_index}, end_index = #{end_index}, " +
           "middle_index = #{middle_index}" if DEBUGGING
  if arr[middle_index] == a
    return middle_index
  elsif begin_index >= end_index
    index = [begin_index, end_index].min
    return index if a < arr[index] && index >= 0  #careful because -1 means end of array
    index = [begin_index, end_index].max
    return index if a < arr[index] && index >= 0
    return index + 1
  elsif a > arr[middle_index]
    return binary_search_helper(arr, a, middle_index + 1, end_index)
  else
    return binary_search_helper(arr, a, begin_index, middle_index - 1)
  end
end

# for [1,3,5,7,9], searching for 6 will return index for 7 for insertion
# if exact match is found, then return that index
def binary_search(arr, a)
  puts "nSearching for #{a} in #{arr}" if DEBUGGING
  return 0 if arr.empty?
  result = binary_search_helper(arr, a, 0, arr.length - 1)
  puts "the result is #{result}, the index for value #{arr[result].inspect}" if DEBUGGING
  return result
end


arr = [1,3,5,7,9]
b = 6
arr.insert(binary_search(arr, b), b)
p arr

arr = [1,3,5,7,9,11]
b = 6
arr.insert(binary_search(arr, b), b)
p arr

arr = [1,3,5,7,9]
b = 60
arr.insert(binary_search(arr, b), b)
p arr

arr = [1,3,5,7,9,11]
b = 60
arr.insert(binary_search(arr, b), b)
p arr

arr = [1,3,5,7,9]
b = -60
arr.insert(binary_search(arr, b), b)
p arr

arr = [1,3,5,7,9,11]
b = -60
arr.insert(binary_search(arr, b), b)
p arr

arr = [1]
b = -60
arr.insert(binary_search(arr, b), b)
p arr

arr = [1]
b = 60
arr.insert(binary_search(arr, b), b)
p arr

arr = []
b = 60
arr.insert(binary_search(arr, b), b)
p arr

И результат:

Searching for 6 in [1, 3, 5, 7, 9]
a = 6, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = 6, arr[middle_index] = 7, begin_index = 3, end_index = 4, middle_index = 3
a = 6, arr[middle_index] = 5, begin_index = 3, end_index = 2, middle_index = 2
the result is 3, the index for value 7
[1, 3, 5, 6, 7, 9]

Searching for 6 in [1, 3, 5, 7, 9, 11]
a = 6, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = 6, arr[middle_index] = 9, begin_index = 3, end_index = 5, middle_index = 4
a = 6, arr[middle_index] = 7, begin_index = 3, end_index = 3, middle_index = 3
the result is 3, the index for value 7
[1, 3, 5, 6, 7, 9, 11]

Searching for 60 in [1, 3, 5, 7, 9]
a = 60, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = 60, arr[middle_index] = 7, begin_index = 3, end_index = 4, middle_index = 3
a = 60, arr[middle_index] = 9, begin_index = 4, end_index = 4, middle_index = 4
the result is 5, the index for value nil
[1, 3, 5, 7, 9, 60]

Searching for 60 in [1, 3, 5, 7, 9, 11]
a = 60, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = 60, arr[middle_index] = 9, begin_index = 3, end_index = 5, middle_index = 4
a = 60, arr[middle_index] = 11, begin_index = 5, end_index = 5, middle_index = 5
the result is 6, the index for value nil
[1, 3, 5, 7, 9, 11, 60]

Searching for -60 in [1, 3, 5, 7, 9]
a = -60, arr[middle_index] = 5, begin_index = 0, end_index = 4, middle_index = 2
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 1, middle_index = 0
a = -60, arr[middle_index] = 9, begin_index = 0, end_index = -1, middle_index = -1
the result is 0, the index for value 1
[-60, 1, 3, 5, 7, 9]

Searching for -60 in [1, 3, 5, 7, 9, 11]
a = -60, arr[middle_index] = 5, begin_index = 0, end_index = 5, middle_index = 2
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 1, middle_index = 0
a = -60, arr[middle_index] = 11, begin_index = 0, end_index = -1, middle_index = -1
the result is 0, the index for value 1
[-60, 1, 3, 5, 7, 9, 11]

Searching for -60 in [1]
a = -60, arr[middle_index] = 1, begin_index = 0, end_index = 0, middle_index = 0
the result is 0, the index for value 1
[-60, 1]

Searching for 60 in [1]
a = 60, arr[middle_index] = 1, begin_index = 0, end_index = 0, middle_index = 0
the result is 1, the index for value nil
[1, 60]

Searching for 60 in []
[60]
2 3

2 ответа:

Это код из java в java.утиль.Массивы.binarySearch как включено в Oracles Java:

    /**
     * Searches the specified array of ints for the specified value using the
     * binary search algorithm.  The array must be sorted (as
     * by the {@link #sort(int[])} method) prior to making this call.  If it
     * is not sorted, the results are undefined.  If the array contains
     * multiple elements with the specified value, there is no guarantee which
     * one will be found.
     *
     * @param a the array to be searched
     * @param key the value to be searched for
     * @return index of the search key, if it is contained in the array;
     *         otherwise, <tt>(-(<i>insertion point</i>) - 1)</tt>.  The
     *         <i>insertion point</i> is defined as the point at which the
     *         key would be inserted into the array: the index of the first
     *         element greater than the key, or <tt>a.length</tt> if all
     *         elements in the array are less than the specified key.  Note
     *         that this guarantees that the return value will be &gt;= 0 if
     *         and only if the key is found.
     */
    public static int binarySearch(int[] a, int key) {
        return binarySearch0(a, 0, a.length, key);
    }

    // Like public version, but without range checks.
    private static int binarySearch0(int[] a, int fromIndex, int toIndex,
                                     int key) {
        int low = fromIndex;
        int high = toIndex - 1;

        while (low <= high) {
            int mid = (low + high) >>> 1;
            int midVal = a[mid];

            if (midVal < key)
                low = mid + 1;
            else if (midVal > key)
                high = mid - 1;
            else
                return mid; // key found
        }
        return -(low + 1);  // key not found.
    }
Алгоритм оказался подходящим, и мне нравится тот факт, что вы мгновенно узнаете по результату, является ли это точным совпадением или намеком на точку вставки.

Вот как я перевел бы это на ruby:

# Inserts the specified value into the specified array using the binary
# search algorithm. The array must be sorted prior to making this call.
# If it is not sorted, the results are undefined.  If the array contains
# multiple elements with the specified value, there is no guarantee
# which one will be found.
#
# @param [Array] array the ordered array into which value should be inserted
# @param [Object] value the value to insert
# @param [Fixnum|Bignum] from_index ordered sub-array starts at
# @param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# @return [Array] the resulting array
def self.insert(array, value, from_index=0,  to_index=array.length)
  array.insert insertion_point(array, value, from_index, to_index), value
end

# Searches the specified array for an insertion point ot the specified value
# using the binary search algorithm.  The array must be sorted prior to making
# this call. If it is not sorted, the results are undefined.  If the array
# contains multiple elements with the specified value, there is no guarantee
# which one will be found.
#
# @param [Array] array the ordered array into which value should be inserted
# @param [Object] value the value to insert
# @param [Fixnum|Bignum] from_index ordered sub-array starts at
# @param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# @return [Fixnum|Bignum] the position where value should be inserted
def self.insertion_point(array, value, from_index=0,  to_index=array.length)
  raise(ArgumentError, 'Invalid Range') if from_index < 0 || from_index > array.length || from_index > to_index || to_index > array.length
  binary_search = _binary_search(array, value, from_index, to_index)
  if binary_search < 0
    -(binary_search + 1)
  else
    binary_search
  end
end

# Searches the specified array for the specified value using the binary
# search algorithm.  The array must be sorted prior to making this call.
# If it is not sorted, the results are undefined.  If the array contains
# multiple elements with the specified value, there is no guarantee which
# one will be found.
#
# @param [Array] array the ordered array in which the value should be searched
# @param [Object] value the value to search for
# @param [Fixnum|Bignum] from_index ordered sub-array starts at
# @param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# @return [Fixnum|Bignum] if > 0 position of value, otherwise -(insertion_point + 1)
def self.binary_search(array, value, from_index=0,  to_index=array.length)
  raise(ArgumentError, 'Invalid Range') if from_index < 0 || from_index > array.length || from_index > to_index || to_index > array.length
  _binary_search(array, value, from_index, to_index)
end

private
# Like binary_search, but without range checks.
#
# @param [Array] array the ordered array in which the value should be searched
# @param [Object] value the value to search for
# @param [Fixnum|Bignum] from_index ordered sub-array starts at
# @param [Fixnum|Bignum] to_index ordered sub-array ends the field before
# @return [Fixnum|Bignum] if > 0 position of value, otherwise -(insertion_point + 1)
def self._binary_search(array, value, from_index, to_index)
  low = from_index
  high = to_index - 1

  while low <= high do
    mid = (low + high) / 2
    mid_val = array[mid]

    if mid_val < value
      low = mid + 1
    elsif mid_val > value
      high = mid - 1
    else
      return mid # value found
    end
  end
  -(low + 1) # value not found.
end

Код возвращает те же значения, что и ОП, предоставленный для его тестовых данных.

На самом деле, вместо проверки на begin_index >= end_index, это может быть лучше обработано с помощью begin_index > end_index, и решение намного чище:

def binary_search_helper(arr, a, begin_index, end_index)    
  if begin_index > end_index
    return begin_index
  else
    middle_index = (begin_index + end_index) / 2
    if arr[middle_index] == a
      return middle_index
    elsif a > arr[middle_index]
      return binary_search_helper(arr, a, middle_index + 1, end_index)
    else
      return binary_search_helper(arr, a, begin_index, middle_index - 1)
    end
  end
end

# for [1,3,5,7,9], searching for 6 will return index for 7 for insertion
# if exact match is found, then return that index
def binary_search(arr, a)
  return binary_search_helper(arr, a, 0, arr.length - 1)
end

И использование итерации вместо рекурсии может быть быстрее и меньше беспокоиться о переполнении стека.