algae/spec/binary_trees/find_substrings

master
  1<<-DOC
  2You have two arrays of strings, words and parts.
  3Return an array that contains the strings from words,
  4modified so that any occurrences of the substrings from parts are surrounded by square brackets [],
  5following these guidelines:
  6
  7If several parts substrings occur in one string in words, choose the longest one.
  8If there is still more than one such part, then choose the one that appears first in the string.
  9
 10Example
 11
 12For words = ["Apple", "Melon", "Orange", "Watermelon"] and parts = ["a", "mel", "lon", "el", "An"], the output should be
 13findSubstrings(words, parts) = ["Apple", "Me[lon]", "Or[a]nge", "Water[mel]on"].
 14
 15While "Watermelon" contains three substrings from the parts array, "a", "mel", and "lon", "mel" is the longest substring
 16that appears first in the string.
 17
 18Input/Output
 19
 20[time limit] 4000ms (rb)
 21[input] array.string words
 22
 23An array of strings consisting only of uppercase and lowercase English letters.
 24
 25Guaranteed constraints:
 260 ≤ words.length ≤ 104,
 271 ≤ words[i].length ≤ 30.
 28
 29[input] array.string parts
 30
 31An array of strings consisting only of uppercase and lower English letters.
 32Each string is no more than 5 characters in length.
 33
 34Guaranteed constraints:
 350 ≤ parts.length ≤ 105,
 361 ≤ parts[i].length ≤ 5,
 37parts[i] ≠ parts[j].
 38
 39[output] array.string
 40DOC
 41
 42describe "#find_substrings" do
 43  #def find_substrings(words, parts)
 44    #regex = /(#{parts.join("|")})/
 45
 46    #puts regex.inspect
 47    #words.map do |word|
 48      #if match = word.match(regex)
 49        #max = match.captures.max { |a, b| a.length <=> b.length }
 50        #puts [word, match.captures, max].inspect
 51        #word.gsub(max, "[#{max}]")
 52      #else
 53        #word
 54      #end
 55    #end
 56  #end
 57
 58  def find_substrings(words, parts)
 59    parts = parts.sort { |x, y| y.length <=> x.length }
 60    words.map do |word|
 61      current = nil
 62      length = nil
 63      index = nil
 64      parts.each do |part|
 65        next if length && length > part.size
 66
 67        if next_index = word.index(part)
 68          if current.nil? || part.length > length || (part.length == length && next_index < index)
 69            current = part
 70            length = part.length
 71            index = next_index
 72          end
 73        end
 74      end
 75      current ? word.sub(current, "[#{current}]") : word
 76    end
 77  end
 78
 79  def find_substrings(words, parts)
 80    return words if parts.empty?
 81
 82    parts_hash = parts.map { |x| [x, x] }.to_h
 83    sorted = parts_hash.keys.sort_by(&:length)
 84    max = sorted.last.size
 85    min = sorted.first.size
 86
 87    words.map do |word|
 88      match = nil
 89      length = nil
 90
 91      0.upto(word.size - 1) do |n|
 92        max.downto(min) do |m|
 93          if (part = parts_hash[word[n...n+m]]) && (match.nil? || part.length > length)
 94            match = part
 95            length = part.length
 96          end
 97        end
 98      end
 99
100      match ? word.sub(match, "[#{match}]") : word
101    end
102  end
103
104  <<-DOC
105  Apple
106  [0,1,2,3,4]
107  [1,2,3,4,5]
108  [A,p,p,l,e]
109root of the tree (A): array position 1
110root's left child (B): array position 2
111root's right child (C): array position 3
112...
113left child of node in array position K: array position 2K
114right child of node in array position K: array position 2K+1
115
116    A
117   / \
118  p   p
119 /\
120l  e
121
122  DOC
123  #def to_tree(string)
124    #string.chars.each_with_index do |char, index|
125    #end
126  #end
127
128  #def to_sexpression(tree, root)
129    #return "()" if tree[root-1].nil?
130    #"(#{tree[root-1]}#{to_sexpression(tree, 2*root)},#{to_sexpression(tree, 2*root + 1)})"
131  #end
132
133  #def find_substrings(words, parts)
134    #words.map do |word|
135      #word_x = to_sexpression(word.chars, 1)
136      #result = nil
137      #parts.each do |part|
138        #part_x = to_sexpression(part.chars, 1)
139        #result = word.sub(part, "[#{part}]") if word_x.include?(part_x)
140      #end
141      #result
142    #end
143  #end
144
145  #def find_substrings(words, parts)
146    #words.each do |word|
147      #parts.each do |parts|
148
149      #end
150    #end
151  #end
152
153  [
154    { words: ["Apple", "Melon", "Orange", "Watermelon"], parts: ["a", "mel", "lon", "el", "An"], x: ["Apple", "Me[lon]", "Or[a]nge", "Water[mel]on"] },
155    { words: ["Aaaaaaaaa", "bcdEFGh"], parts: ["aaaaa", "Aaaa", "E", "z", "Zzzzz"], x: ["A[aaaaa]aaa", "bcd[E]FGh"] },
156    { words: [], parts: ["aaaaa", "Aaaa", "E", "z", "Zzzzz", "a", "mel", "lon", "el", "An"], x: [] },
157    { words: ["Aaaaaaaaa", "bcdEFGh", "Apple", "Melon", "Orange", "Watermelon"], parts: [], x: ["Aaaaaaaaa", "bcdEFGh", "Apple", "Melon", "Orange", "Watermelon"] },
158    { words: ["neuroses", "myopic", "sufficient", "televise", "coccidiosis", "gules", "during", "construe", "establish", "ethyl"], parts: ["aaaaa", "Aaaa", "E", "z", "Zzzzz", "a", "mel", "lon", "el", "An", "ise", "d", "g", "wnoVV", "i", "IUMc", "P", "KQ", "QfRz", "Xyj", "yiHS"], x: ["neuroses", "myop[i]c", "suff[i]cient", "telev[ise]", "cocc[i]diosis", "[g]ules", "[d]uring", "construe", "est[a]blish", "ethyl"] },
159    { words: ["abc"], parts: ["abc"], x: ["[abc]"] },
160    { words: ["abc"], parts: ["ABC"], x: ["abc"] },
161    { words: ["a", "b"], parts: ["b"], x: ["a", "[b]"] },
162    { words: ["aaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaab", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaac"], parts: ["aaaaa", "bbbbb", "a", "aa", "aaaa", "AAAAA", "aaa", "aba", "aaaab", "c", "bbbb", "d", "g", "ccccc", "B", "C", "P", "D"], x: ["[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaab", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaab", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaab", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaab", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaab", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaa", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaa", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaaa", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaaa", "[aaaaa]aaaaaaaaaaaaaaaaaaaaaaaac"] },
163    { words: ["during"], parts: ["d", "g", "i"], x: ["[d]uring"] },
164  ].each do |x|
165    it do
166      result = find_substrings(x[:words], x[:parts])
167      expect(result).to eql(x[:x])
168    end
169  end
170end