Source code for faster.deduplication

import cupy as cp
import math
import numpy as np
import pandas as pd
from .search import intersect, setdiff, reduce

jaro_winkler_dedup_code = r"""
extern "C"{

  __device__ float jaro_winkler(const char *str1,
                                const int len1,
                                bool *hash_str1,
                                const char *str2,
                                const int len2,
                                bool *hash_str2,
                                float p) {

    // This function computes the Jaro-Winkler similarity between two strings
    // Inputs:
    // - str1: First string
    // - len1: Length of str1
    // - hash_str1: Working memory to keep track of which characters in str1 are
    //              matching to corresponding characters in str2
    // - str2: Second string
    // - len2: Length of str2
    // - hash_str2: Working memory to keep track of which characters in str2 are
    //              matching to corresponding characters in str1
    // - p: Scaling factor applied to the common prefix
    // Output:
    // - dist: Jaro-Winkler similarity between str1 and str2


    if (len1 == 0 || len2 == 0) {

        // If either string is null, the Jaro-Winkler similarity between str1 and str2 is 0
        return 0.0;

    } else {

        // We compute the number of matching characters between str1 and str2

        // We consider the characters max(len1, len2) / 2 - 1 away from each other
        int max_dist = max(len1, len2) / 2 - 1;

        float match = 0;

        for (int i = 0; i < len1; i++) {

            for (int j = max(0, i - max_dist); j < min(len2, i + max_dist + 1); j++) {

                if (str1[i] == str2[j] && hash_str2[j] == false) {

                    // Two characters are matching if they appear in both strings at most max_dist characters away from each other
                    hash_str1[i] = true;
                    hash_str2[j] = true;
                    match++;
                    break;

                }

            }

        }

        if (match == 0) {

            // If there is no matching characters between both strings, the Jaro-Winkler similarity between them is 0
            return 0.0;

        } else {

            float t = 0;

            int point = 0;

            // If a positive number of matching characters is found, we need to compute the number of transpositions
            // that is, the number of matching characters that are not in the right order divided by two
            for (int i = 0; i < len1; i++) {

                if (hash_str1[i] == true) {

                    while (hash_str2[point] == false) {

                        point++;

                    }

                    if (str1[i] != str2[point++]) {

                        t++;

                    }

                }

            }

            t /= 2;

            // The Jaro similarity between str1 and str2 is defined as follows:
            float dist = ((match / (float)len1) + (match / (float)len2) + ((match - t) / match)) / 3.0;

            // To go from the Jaro similarity to the Jaro-Winkler similarity, we need
            // to compute the length of the common prefix between both strings
            float prefix = 0;

            for (int i = 0; i < min(min(len1, len2), 4); i++) {

                if (str1[i] == str2[i]) {

                    prefix++;

                } else {

                    break;

                }

            }

            // To obtain the Jaro-Winkler similarity, we adjust the Jaro similarity for the length of the common prefix between both strings
            dist += p * prefix * (1 - dist);

            return dist;

        }

    }

  }

  __global__ void jaro_winkler_kernel(char *str,
                                      int *length,
                                      long long *offsets,
                                      int n,
                                      bool *buffer1,
                                      long long *offsets1,
                                      bool *buffer2,
                                      long long *offsets2,
                                      float p,
                                      float *output,
                                      int n_output,
                                      int start_row,
                                      int end_row) {

    const long long id = threadIdx.x + blockDim.x * blockIdx.x;

    if (id < n_output) {

      const int row = id / n + start_row; // Index of the string processed in str1

      const int col = id % n; // Index of the string processed in str2

      // Only computes Jaro-Winkler similarity if row >= col (preventing redundant comparisons)
      if (row >= col) {

        if (row != col) {

          long long off_row = (row == 0 ? 0 : offsets[row - 1]);

          // Move the pointer to the first character of the string we are processing
          char *string1 = str + off_row;

          // Computing the length of the string we are processing
          int len1 = length[row];

          // Move the pointer to the first element of the working memory
          long long off1 = (row == start_row ? 0 : offsets1[row - start_row - 1]);

          bool *hash_str1 = buffer1 + off1 + len1 * col;

          long long off_col = (col == 0 ? 0 : offsets[col - 1]);

          char *string2 = str + off_col;

          int len2 = length[col];

          long long off2 = (col == 0 ? 0 : offsets2[col - 1]);

          bool *hash_str2 = buffer2 + off2 + len2 * (end_row - 1 - row);

          // Compute the Jaro-Winkler similarity between string1 and string2
          output[id] = jaro_winkler(string1, len1, hash_str1, string2, len2, hash_str2, p);

        } else {

          // A string is identical to itself, so its Jaro-Winkler similarity with itself is 1
          output[id] = 1;

        }

      }

    }

  }

}
"""

jaro_winkler_dedup_kernel = cp.RawKernel(jaro_winkler_dedup_code, 'jaro_winkler_kernel')

output_count_dedup_code = r"""
extern "C" {

  __global__ void output_count(long long *input_A,
                               long long *input_B,
                               int n_input,
                               int *unique_count,
                               int *output) {

    // Element of indices being processed
    const long long id = threadIdx.x + blockDim.x * blockIdx.x;

    if (id < n_input) {

      // First input
      long long id_A = input_A[id];

      // Second input
      long long id_B = input_B[id];

      // Number of observations with id_A in df
      int len_A = unique_count[id_A];

      // Number of observations with id_B in df
      int len_B = unique_count[id_B];

      if (id_A != id_B) {

        // Computes the number of pairs of values with id_A and id_B
        output[id] = len_A * len_B;

      } else {

        // If id_A = id_B, we disregard pairs formed by identical elements and those where the row index is less than the column index
        output[id] = len_A * (len_B - 1) / 2;

      }

    }

  }

}
"""

output_count_dedup_kernel = cp.RawKernel(output_count_dedup_code, 'output_count')

indices_inverse_dedup_code = r"""
extern "C" {

  __global__ void indices_inverse(long long *input_A,
                                  long long *input_B,
                                  int n_input,
                                  int n,
                                  long long *unique_argwhere,
                                  int *unique_argwhere_offsets,
                                  int *unique_count,
                                  long long *output,
                                  long long *output_offsets) {

    // Element of indices being processed
    const long long id = threadIdx.x + blockDim.x * blockIdx.x;

    if (id < n_input) {

      long long id_A = input_A[id];

      long long id_B = input_B[id];

      int len_A = unique_count[id_A]; // Number of observations with id_A in df_A

      int len_B = unique_count[id_B]; // Number of observations with id_B in df_B

      // Where observations with id_A in df_A start in unique_A_argwhere
      long long unique_A_off = (id_A == 0 ? 0 : unique_argwhere_offsets[id_A - 1]);

      // Where observations with id_B in df_B start in unique_B_argwhere
      long long unique_B_off = (id_B == 0 ? 0 : unique_argwhere_offsets[id_B - 1]);

      // Offset unique_A_argwhere appropriately
      long long *unique_A_argwhere_off = unique_argwhere + unique_A_off;

      // Offset unique_B_argwhere appropriately
      long long *unique_B_argwhere_off = unique_argwhere + unique_B_off;

      // Where the output starts in output
      long long output_off = (id == 0 ? 0 : output_offsets[id - 1]);

      if (id_A != id_B) {

        int k = 0;

        for (int i = 0; i < len_A ; i++) {

          for (int j = 0; j < len_B; j++) {

            // Considers only pairs with the row index greater than the column index

            if (unique_A_argwhere_off[i] > unique_B_argwhere_off[j]) {

              // Transpose indices of pairs in df_A and df_B in output
              output[output_off + k++] = unique_A_argwhere_off[i] * n + unique_B_argwhere_off[j];

            } else {

              // Transpose indices of pairs in df_A and df_B in output
              output[output_off + k++] = unique_B_argwhere_off[j] * n + unique_A_argwhere_off[i];

            }

          }

        }

      } else {

        int k = 0;

        for (int i = 1; i < len_A; i++) {

          // Considers only pairs with the row index greater than the column index

          for (int j = 0; j < i; j++) {

            // Transpose indices of pairs in df_A and df_B in output
            output[output_off + k++] = unique_A_argwhere_off[i] * n + unique_B_argwhere_off[j];

          }

        }

      }

    }

  }

}
"""

indices_inverse_dedup_kernel = cp.RawKernel(indices_inverse_dedup_code, 'indices_inverse')

indices_inverse_exact_dedup_code = r"""
extern "C" {

  __global__ void indices_inverse(long long *input,
                                  int n,
                                  long long *unique_argwhere,
                                  int *unique_argwhere_offsets,
                                  long long *output,
                                  int *output_mask,
                                  int *output_offsets,
                                  int n_output) {

    const long long id = threadIdx.x + blockDim.x * blockIdx.x; // Element of indices being processed

    if (id < n_output) {

      // Input element to which the processed output element refers
      long long mask = output_mask[id];

      // Move pointer to where the output begins in output
      long long output_off = (mask == 0 ? 0 : output_offsets[mask - 1]);

      long long i = id - output_off;

      long long in = input[mask];

      // Row index
      long long row = floorf((sqrtf(8 * i + 1) - 1) / 2);

      // Column index: consider only those lower than row index
      long long col = i - row * (row + 1) / 2;

      long long unique_off = (in == 0 ? 0 : unique_argwhere_offsets[in - 1]);

      long long *unique_argwhere_off = unique_argwhere + unique_off;

      // Transpose indices of pairs in df_A and df_B in output
      output[id] = unique_argwhere_off[row + 1] * n + unique_argwhere_off[col];

    }

  }

}
"""

indices_inverse_exact_dedup_kernel = cp.RawKernel(indices_inverse_exact_dedup_code, 'indices_inverse')


[docs]
def jaro_winkler_dedup_gpu(string, p = 0.1, lower_thr = 0.88, upper_thr = 0.94, num_threads = 256, max_chunk_size = 2.0):
  """
  Computes the Jaro-Winkler similarity between all pairs of strings in an array and returns the indices corresponding to pairs of strings whose Jaro-Winkler similarity falls within specified thresholds.

  :param string: Array of strings.
  :type string: numpy.ndarray
  :param p: Scaling factor applied to the common prefix in the Jaro-Winkler similarity. Defaults to 0.1.
  :type p: float, optional
  :param lower_thr: Lower threshold for discretizing the Jaro-Winkler distance. Defaults to 0.88.
  :type lower_thr: float, optional
  :param upper_thr: Upper threshold for discretizing the Jaro-Winkler distance. Defaults to 0.94.
  :type upper_thr: float, optional
  :param num_threads: Number of threads per block. Defaults to 256.
  :type num_threads: int, optional
  :param max_chunk_size: Maximum memory allocation per processing chunk, in gigabytes (GB). Defaults to 2.0.
  :type max_chunk_size: float, optional
  :return: List containing two arrays of indices:
             1. Indices with Jaro-Winkler distance between ``lower_thr`` and ``upper_thr``.
             2. Indices with Jaro-Winkler distance above ``upper_thr``.
                   
           Indices represent ``i * len(str_B) + j``, where ``i`` is the element's index in ``str_A`` and ``j`` is the element's index in ``str_B``.
  :rtype: list[cupy.ndarray]
  """

  mempool = cp.get_default_memory_pool()

  # Extract unique values of string (with inverse and counts)
  unique, unique_inverse, unique_counts = np.unique(string, return_inverse = True, return_counts = True)

  n_unique = len(unique)

  # Array containing the indices corresponding to each unique value of string (stored as an arrow)
  unique_inverse_gpu = cp.array(unique_inverse, dtype = np.int32)

  unique_inverse_sorted = cp.argsort(unique_inverse_gpu)

  del unique_inverse_gpu
  mempool.free_all_blocks()

  # Array containing the number of observations in string associated with each unique value
  unique_counts_gpu = cp.array(unique_counts, dtype = np.int32)

  # Array containing the offsets necessary to read the indices corresponding to each unique value in string
  unique_offsets_gpu = cp.cumsum(unique_counts_gpu, dtype = np.int32)

  unique_arrow = np.frombuffer(''.join(unique).encode(), dtype = np.int8)

  len_arrow = len(unique_arrow)

  # Array containing the unique values stored as an arrow
  unique_arrow_gpu = cp.array(unique_arrow, dtype = np.int8)

  # Array containing the length of unique values
  unique_len = np.fromiter((len(row) for row in unique), dtype = np.int32, count = len(unique))

  unique_len_gpu = cp.array(unique_len, dtype = np.int32)

  # Array containing the offsets necessary to read the unique values in arrow
  offsets_gpu = cp.cumsum(unique_len_gpu, dtype = np.int64)

  # Approximate the number of chunks required to meet max_chunk_size
  total_comp = len(unique) * (len(unique) + 1) / 2

  chunks = math.ceil((len(unique) * (len(unique) + 1) * 8 + len_arrow * (1 + 2 * len(unique)) + (len(unique) + 1) * 8) / (max_chunk_size * 1024 ** 3 - len_arrow - (len(unique) + 1) * 8))

  # Create partitions accordingly
  chunk_size_row = math.ceil(len(unique) / chunks)

  indices = []

  # Compute the Jaro-Winkler similarity metric by chunk
  for i in range(chunks):

    start_row = i * chunk_size_row

    offset = start_row * len(unique)

    end_row = min((i + 1) * chunk_size_row, len(unique))

    num_comp = end_row * len(unique) - offset

    rows = cp.arange(start_row, end_row, dtype = np.int32)

    # Create working memory for the compute kernel (only for comparisons below the diagonal)
    buffer1_len = unique_len_gpu[rows] * (rows + 1)

    buffer1_offsets = cp.cumsum(buffer1_len, dtype = np.int64)

    del buffer1_len
    mempool.free_all_blocks()

    buffer1 = cp.zeros(int(buffer1_offsets[-1]), dtype = bool)

    if start_row > 0:
      buffer2_len = cp.concatenate((unique_len_gpu[:start_row] * (end_row - start_row), unique_len_gpu[rows] * (end_row - rows)))
    else:
      buffer2_len = unique_len_gpu[rows] * (end_row - rows)

    del rows
    mempool.free_all_blocks()

    buffer2_offsets = cp.cumsum(buffer2_len, dtype = np.int64)

    del buffer2_len
    mempool.free_all_blocks()

    buffer2 = cp.zeros(int(buffer2_offsets[-1]), dtype = bool)

    # Create output vector
    output_gpu = cp.zeros(int(num_comp), dtype = cp.float32)

    # Call the compute kernel on GPU
    num_blocks = math.ceil(num_comp / num_threads)

    jaro_winkler_dedup_kernel((num_blocks,), (num_threads,), (unique_arrow_gpu, unique_len_gpu, offsets_gpu, len(unique), buffer1, buffer1_offsets, buffer2, buffer2_offsets, cp.float32(p), output_gpu, cp.int32(num_comp), cp.int32(start_row), cp.int32(end_row)))

    del buffer1, buffer1_offsets, buffer2, buffer2_offsets
    mempool.free_all_blocks()

    # Extract the indices with Jaro-Winkler similarity between lower_thr and upper_thr
    indices1 = cp.bitwise_and(output_gpu >= lower_thr, output_gpu < upper_thr)

    argwhere1 = cp.argwhere(indices1)

    del indices1
    mempool.free_all_blocks()

    # Extract the indices with Jaro-Winkler similarity above upper_thr
    argwhere2 = cp.argwhere(output_gpu >= upper_thr)

    del output_gpu
    mempool.free_all_blocks()

    # Adjust indices relative to the starting row
    output1 = cp.ravel(argwhere1) + int(offset)

    output2 = cp.ravel(argwhere2) + int(offset)

    del argwhere1, argwhere2
    mempool.free_all_blocks()

    indices.append([output1, output2])

    del output1, output2
    mempool.free_all_blocks()

  del unique_arrow_gpu, unique_len_gpu, offsets_gpu
  mempool.free_all_blocks()

  # Concatenate indices from all chunks
  indices1 = cp.concatenate((x[0] for x in indices), dtype = np.int64)

  indices2 = cp.concatenate((x[1] for x in indices), dtype = np.int64)

  del indices
  mempool.free_all_blocks()

  if indices1.size > 0:
  
    # Invert indices1, i.e., translate into indices from the original data frame
    indices1_A = indices1 // len(unique)
  
    indices1_B = indices1 % len(unique)
  
    del indices1
    mempool.free_all_blocks()
  
    # Calculate the output count for each input element
    output1_count = cp.zeros(indices1_A.size, dtype = np.int32)
  
    num_blocks = math.ceil(indices1_A.size / num_threads)
  
    output_count_dedup_kernel((num_blocks,), (num_threads,), (indices1_A, indices1_B, indices1_A.size, unique_counts_gpu, output1_count))
  
    output1_offsets = cp.cumsum(output1_count, dtype = np.int64)
  
    output1_gpu = cp.zeros(int(output1_offsets[-1]), dtype = np.int64)
  
    indices_inverse_dedup_kernel((num_blocks,), (num_threads,), (indices1_A, indices1_B, indices1_A.size, len(string), unique_inverse_sorted, unique_offsets_gpu, unique_counts_gpu, output1_gpu, output1_offsets))
  
    del indices1_A, indices1_B, output1_count, output1_offsets
    mempool.free_all_blocks()

    # Sort output vectors
    output1_sorted = cp.sort(output1_gpu)
  
    del output1_gpu
    mempool.free_all_blocks()

  else:

    output1_sorted = cp.zeros(0, dtype = np.int64)

  if indices2.size > 0:

    # Invert indices2
    indices2_A = indices2 // len(unique)
  
    indices2_B = indices2 % len(unique)
  
    del indices2
    mempool.free_all_blocks()
  
    output2_count = cp.zeros(indices2_A.size, dtype = np.int32)
  
    num_blocks = math.ceil(indices2_A.size / num_threads)
  
    output_count_dedup_kernel((num_blocks,), (num_threads,), (indices2_A, indices2_B, indices2_A.size, unique_counts_gpu, output2_count))
  
    output2_offsets = cp.cumsum(output2_count, dtype = np.int64)
  
    del output2_count
    mempool.free_all_blocks()
  
    output2_gpu = cp.zeros(int(output2_offsets[-1]), dtype = np.int64)
  
    indices_inverse_dedup_kernel((num_blocks,), (num_threads,), (indices2_A, indices2_B, indices2_A.size, len(string), unique_inverse_sorted, unique_offsets_gpu, unique_counts_gpu, output2_gpu, output2_offsets))
  
    del indices2_A, indices2_B, output2_offsets, unique_inverse_sorted, unique_counts_gpu, unique_offsets_gpu
    mempool.free_all_blocks()
  
    output2_sorted = cp.sort(output2_gpu)
  
    del output2_gpu
    mempool.free_all_blocks()

  else:

    output2_sorted = cp.zeros(0, dtype = np.int64)

    del unique_inverse_sorted, unique_counts_gpu, unique_offsets_gpu
    mempool.free_all_blocks()

  return [output1_sorted, output2_sorted]



[docs]
def exact_dedup_gpu(string, num_threads = 256):
  """
  Compares all pairs of strings in an array and returns the indices of exact matches.

  :param string: Array of strings.
  :type string: numpy.ndarray
  :param num_threads: Number of threads per block. Defaults to 256.
  :type num_threads: int, optional
  :return: Array of indices corresponding to pairs with an exact match.
  
           Indices represent ``i * len(str_B) + j``, where ``i`` is the element's index in ``str_A`` and ``j`` is the element's index in ``str_B``.
  :rtype: list[cupy.ndarray]
  """

  mempool = cp.get_default_memory_pool()

  # Extract unique values of string (with inverse and counts)
  unique, unique_inverse, unique_counts = np.unique(string, return_inverse = True, return_counts = True)

  # Array containing the indices corresponding to each unique value of string (stored as an arrow)
  unique_inverse_gpu = cp.array(unique_inverse, dtype = np.int64)

  unique_inverse_sorted = cp.argsort(unique_inverse_gpu)

  del unique_inverse_gpu
  mempool.free_all_blocks()

  # Array containing the number of observations in string associated with each unique value
  unique_counts_gpu = cp.array(unique_counts, dtype = np.int32)

  # Array containing the offsets necessary to read the indices corresponding to each unique value in str_A
  unique_offsets_gpu = cp.cumsum(unique_counts_gpu, dtype = np.int32)

  # Extract unique values with at least two frequencies
  indices = cp.argwhere(unique_counts_gpu > 1)

  indices_ravel = cp.ravel(indices)

  del indices
  mempool.free_all_blocks()

  if indices_ravel.size > 0:
  
    # Invert indices, i.e., translating into indices from original data frame
    output_count = unique_counts_gpu[indices_ravel] * (unique_counts_gpu[indices_ravel] - 1) / 2
  
    output_offsets = cp.cumsum(output_count, dtype = np.int32)
  
    # Array indicating for the element of indices to which each element of the output is referring to
    output_mask = cp.repeat(cp.arange(0, indices_ravel.size, dtype = np.int32), repeats = output_count.astype(int).get().tolist())
  
    output_gpu = cp.zeros(int(output_offsets[-1]), dtype = np.int64)
  
    num_blocks = math.ceil(output_gpu.size / num_threads)
  
    indices_inverse_exact_dedup_kernel((num_blocks,), (num_threads,), (indices_ravel, len(string), unique_inverse_sorted, unique_offsets_gpu, output_gpu, output_mask, output_offsets, output_gpu.size))
  
    del unique_inverse_sorted, unique_counts_gpu, unique_offsets_gpu, indices_ravel, output_count, output_mask, output_offsets
    mempool.free_all_blocks()
  
    # Sort the output vector
    output_sorted = cp.sort(output_gpu)
  
    del output_gpu
    mempool.free_all_blocks()

  else:

    output_sorted = cp.zeros(0, dtype = np.int64)

    del unique_inverse_sorted, unique_counts_gpu, unique_offsets_gpu, indices_ravel
    mempool.free_all_blocks()

  return [output_sorted]



[docs]
class Deduplication():
  """
  A class for comparing the values of selected variables in one pandas DataFrame.

  :param df: DataFrame to deduplicate.
  :type df: pandas.DataFrame
  :param Vars_Fuzzy: List of variable names to be compared using fuzzy matching.
  :type Vars_Fuzzy: list[str]
  :param Vars_Exact: List of variable names to be compared using exact matching. Defaults to an empty list.
  :type Vars_Exact: list[str], optional
  :raises Exception: If any name in ``Vars_Fuzzy`` or ``Vars_Exact`` is not found in ``df``.
  """

  def __init__(self, df: pd.DataFrame, Vars_Fuzzy, Vars_Exact = []):

    # Check that inputs are valid
    if any(var not in df.columns for var in Vars_Fuzzy) or any(var not in df.columns for var in Vars_Exact):
      raise Exception("The variable names in Vars_Fuzzy and Vars_Exact must match variable names in df.")

    self.df = df
    self.Vars_Fuzzy = Vars_Fuzzy
    self.Vars_Exact = Vars_Exact
    self.Indices = None
    """
    This attribute stores a list of index arrays representing pairs of records from ``df_A`` and ``df_B`` that correspond to each combination of discrete similarity levels across all compared variables.
    
    :return: List of arrays, where each array contains indices of record pairs associated with a specific combination of discrete similarity levels.

             Indices represent ``i * len(str_B) + j``, where ``i`` is the element's index in ``str_A`` and ``j`` is the element's index in ``str_B``.

             Similarity patterns are defined iteratively across variables (both fuzzy and exact), following the order specified by the user. Variables listed later in the sequence define faster-changing discrete levels of similarity.

             The pattern representing no similarity between records is omitted.
    :rtype: list[cupy.ndarray]
    """
    self._Fit_flag = False


[docs]
  def fit(self, p = 0.1,Lower_Thr = 0.88, Upper_Thr = 0.94, Num_Threads = 256, Max_Chunk_Size = 2.0):
    """
    Compares all pairs of observations across the selected variables in the dataframe. The result is stored in the Indices attribute.

    :param p: Scaling factor applied to the common prefix in the Jaro-Winkler similarity. Defaults to 0.1.
    :type p: float, optional
    :param Lower_Thr: Lower threshold for discretizing the Jaro-Winkler distance. Defaults to 0.88.
    :type Lower_Thr: float, optional
    :param Upper_Thr: Upper threshold for discretizing the Jaro-Winkler distance. Defaults to 0.94.
    :type Upper_Thr: float, optional
    :param Num_Threads: Number of threads per block. Defaults to 256.
    :type Num_Threads: int, optional
    :param Max_Chunk_Size: Maximum memory allocation per processing chunk, in gigabytes (GB). Defaults to 2.0.
    :type Max_Chunk_Size: float, optional
    :raises Exception: If the model has already been fitted, it cannot be fitted again.
    """

    if self._Fit_flag:
      raise Exception("If the model has already been fitted, it cannot be fitted again.")

    mempool = cp.get_default_memory_pool()
    indices = []

    # Loop over variables and compute the Jaro-Winkler similarity between all pairs of values
    for i in range(len(self.Vars_Fuzzy)):
      indices.append(jaro_winkler_dedup_gpu(self.df[self.Vars_Fuzzy[i]].to_numpy(), p, Lower_Thr, Upper_Thr, Num_Threads, Max_Chunk_Size))
      mempool.free_all_blocks()

    # Loop over variables and compare all pairs of values for exact matching
    for i in range(len(self.Vars_Exact)):
      indices.append(exact_dedup_gpu(self.df[self.Vars_Exact[i]].to_numpy(), Num_Threads))
      mempool.free_all_blocks()

    # Merge discrete levels of similarity over all variables
    self.Indices = indices[0]
    del indices[0]
    mempool.free_all_blocks()

    while len(indices) > 0:

      output = []

      for j in range(len(indices[0])):

        output.append(reduce(setdiff, self.Indices, indices[0][j]))
        mempool.free_all_blocks()

      while len(self.Indices) > 0:

        output.append(reduce(setdiff, indices[0], self.Indices[0]))
        mempool.free_all_blocks()

        for j in range(len(indices[0])):

          output.append(intersect(self.Indices[0], indices[0][j]))
          mempool.free_all_blocks()

        del self.Indices[0]
        mempool.free_all_blocks()

      self.Indices = output

      del indices[0], output
      mempool.free_all_blocks()

    self._Fit_flag = True

    del indices
    mempool.free_all_blocks()


  @property
  def Counts(self):
    """
    This property stores the count of record pairs corresponding to each combination of discrete similarity levels across all compared variables.
    
    :return: Array containing the number of pairs for each pattern of discrete similarity levels across variables.
    :rtype: numpy.ndarray
    """
    if not self._Fit_flag:
      raise Exception("The model must be fitted first.")

    try:
      return self._Counts
    except:
      counts = [x.size for x in self.Indices] # Number of pairs for each pattern of discrete levels of similarity
      self._Counts = np.concatenate([[int(len(self.df) * (len(self.df) + 1) / 2) - np.sum(counts)], counts]) # Add count of omitted pattern
      return self._Counts