SubstringWithConcatenationOfAllWords [source code]


public class SubstringWithConcatenationOfAllWords {
static
/******************************************************************************/
class Solution {
    public List<Integer> findSubstring(String s, String[] words) {
        List<Integer> res = new ArrayList<> ();
        if (words.length == 0 || s.length () == 0)
            return res;
        int word_len = words[0].length ();
        if (word_len > s.length ())
            return res;
        Map<String, Integer> counts = new HashMap<> ();
        for (String word : words)
            counts.put (word, counts.getOrDefault (word, 0) + 1);
        for (int i = 0; i < word_len; i++)
            find (s, counts, i, word_len, res, words.length);
        return res;
    }

    void find (String s, Map<String, Integer> _counts, int index, int word_len, List<Integer> res, int target_cnt) {
        Map<String, Integer> counts = new HashMap<> (_counts);
        // It's actually better to start with an EMPTY window (J == I)
        int i = index, j = index, window_cnt = 0;
        while (j < s.length ()) {
            if (j + word_len > s.length ())
                break;
            String peek = s.substring (j, j + word_len);
            Integer count = counts.get (peek);
            if (count == null) {
                // Critical: advance by WORD_LEN rather than 1
                find (s, _counts, j + word_len, word_len, res, target_cnt);
                return;
            }
            if (count == 0) {
                while (i < j && !s.startsWith (peek, i)) {
                    String i_word = s.substring (i, i + word_len);
                    counts.put (i_word, counts.get (i_word) + 1);
                    i += word_len;
                    window_cnt--;
                }
                if (i < j) {
                    counts.put (peek, counts.get (peek) + 1);
                    i += word_len;
                    window_cnt--;
                }
            }
            counts.put (peek, counts.get (peek) - 1);
            window_cnt++;
            if (window_cnt == target_cnt) {
                res.add (i);
                String i_word = s.substring (i, i + word_len);
                counts.put (i_word, counts.get (i_word) + 1);
                window_cnt--;
                i += word_len;
            }
            j += word_len;
        }
    }
}
/******************************************************************************/

    public static void main(String[] args) {
        SubstringWithConcatenationOfAllWords.Solution tester = new SubstringWithConcatenationOfAllWords.Solution();
        String[][] inputs = {
            {"barfoothefoobarman"}, {"foo", "bar"},
            {"foobarfoo"}, {"foo", "bar"},
            {"a"}, {"a"},
            {"barfoofoobarthefoobarman"}, {"bar","foo","the"},
            {"wordgoodgoodgoodbestword"}, {"word","good","best","good"},
            {"aaaaaaaa"}, {"aa","aa","aa"},
            {"mississippi"}, {"si","is"},
        };
        Integer[][] answers = {
            {0, 9},
            {0, 3},
            {0},
            {6, 9, 12},
            {8},
            {0, 1, 2},
            {1,4},
        };
        for (int i = 0; i < inputs.length / 2; i++) {
            String s = inputs[2 * i][0];
            String[] words = inputs[2 * i + 1];
            Set<Integer> ans = new HashSet<> (Arrays.asList (answers[i]));
            System.out.println (Printer.separator ());
            Set<Integer> output = new HashSet<> (tester.findSubstring (s, words));
            System.out.printf ("%s and [%s] -> %s, expected: %s%n", 
                s, Printer.array (words), Printer.wrapColor (output.toString (), output.equals (ans) ? "green" : "red"), ans
            );
        }
    }
}

看起来好像不难, 不过这个题目被标记成hard, 自然是有坑的, 比如, boofarboo这个, 你怎么把两个都找到?

好像用sliding window可以写写看;

好像大部分的collection都可以直接判断equality by element:

java> List<Integer> ls1 = new ArrayList<> ()  
java.util.List<java.lang.Integer> ls1 = []  
java> ls1.add (1)  
java.lang.Boolean res6 = true  
java> ls1.add (2)  
java.lang.Boolean res7 = true  
java> List<Integer> ls2 = new ArrayList<> (ls1)  
java.util.List<java.lang.Integer> ls2 = [1, 2]  
java> ls1.equals (ls2)  
java.lang.Boolean res9 = true  
java> ls2.add (3)  
java.lang.Boolean res10 = true  
java> ls1.equals (ls2)  
java.lang.Boolean res11 = false  
java> ls1.add (3)  
java.lang.Boolean res12 = true  
java> ls1.equals (ls2)  
java.lang.Boolean res13 = true

刚开始以为这题可以确认words里面不会有重复, 所以想着干脆把0/1的value换成boolean, 速度快一点, 而且更readable一些; 不过后来被test5给break掉了; 所以我觉得这种东西, 面试的时候一定要问清楚面试官; 实际上, 还是要用integer;

写了好久, 最后还是没写出来, 被test7给break掉, 感觉逻辑有深层缺陷, 写不成了;

代码留存:

class Solution {  
    public List<Integer> findSubstring(String s, String[] words) {  
        Set<Integer> res = new HashSet<> ();  
        if (words.length == 0 || s.length () == 0)  
            return new ArrayList<> ();  
        int word_len = words[0].length ();  
        if (word_len > s.length ())  
            return new ArrayList<> ();  
        Map<String, Integer> counts = new HashMap<> ();  
        int target_cnt = 0;  
        for (String word : words) {  
            counts.put (word, counts.getOrDefault (word, 0) + 1);  
            target_cnt++;  
        }  
        System.out.println (counts.size () + ", " + word_len);//debug1  
        find (s, counts, seekStart (s, counts, 0, word_len), word_len, res, target_cnt);  
        return new ArrayList<> (res);  
    }  

    int seekStart (String s, Map<String, Integer> _counts, int index, int word_len) {  
        System.out.printf ("seek from %d, %s\n", index, _counts);//debug1  
        int res = -1;  
        for (int i = index; i + word_len <= s.length (); i++) {         // critical: include =  
            if (_counts.containsKey (s.substring (i, i + word_len))) {  
                res = i;  
                break;  
            }  
        }  
        System.out.printf ("\tseek success at %d\n", res);//debug1  
        return res;  
    }  

    void find (String s, Map<String, Integer> _counts, int index, int word_len, Set<Integer> res, int target_cnt) {  
        System.out.printf ("find at %d\n", index);//debug1  
        Map<String, Integer> counts = new HashMap<> (_counts);  
        // It's actually better to start with an EMPTY window (J == I)  
        int i = index, j = index, window_cnt = 0;  
        while (j < s.length ()) {  
            System.out.printf ("\t%s, start:(%d), i:(%d),j:(%d), cnt:(%d), res:%s\n", counts, index, i, j, window_cnt, res);//debug1  
            if (j + word_len > s.length ())  
                break;  
            String peek = s.substring (j, j + word_len);  
            Integer count = counts.get (peek);  
            if (count == null) {  
                i = seekStart (s, _counts, j, word_len);  
                if (i >= 0)  
                    find (s, _counts, i, word_len, res, target_cnt);  
                return;  
            }  
            if (count == 0) {  
                while (i < j && !s.startsWith (peek, i)) {  
                    String i_word = s.substring (i, i + word_len);  
                    counts.put (i_word, counts.get (i_word) + 1);  
                    i += word_len;  
                    window_cnt--;  
                }  
                if (i < j) {  
                    counts.put (peek, counts.get (peek) + 1);  
                    i += word_len;  
                    window_cnt--;  
                }  
            }  
            counts.put (peek, counts.get (peek) - 1);  
            window_cnt++;  
            if (window_cnt == target_cnt) {  
                res.add (i);  
                int new_start = seekStart (s, _counts, i + 1, word_len);  
                if (new_start >= 0)  
                    find (s, _counts, new_start, word_len, res, target_cnt);  
                String i_word = s.substring (i, i + word_len);  
                counts.put (i_word, counts.get (i_word) + 1);  
                window_cnt--;  
                i += word_len;  
            }  
            j += word_len;  
            System.out.printf ("\t%s, start:(%d), i:(%d), j:(%d), cnt:(%d), res:%s\n\n", counts, index, i, j, window_cnt, res);//debug1  
        }  
    }  
}

看到discussion最优解之后, 知道我唯一就差的那一步, 其他的我都想到了: 当我们走到了一个invalid的word的位置的时候, 直接就应该继续走过一个word_len的长度, 并且重置history; 而我之前的做法, 是当出现invalid的时候, 继续按+1的速度搜到一个valid的word为止. 但是这个思路最后就是被test7给搞掉了. 正确的思路, 其实就是走word_len个sliding window, 每一个iteration当中, 走的最小单位始终是word_len, 而我之前的错误做法当中, 经常是按照1的单位前进. 那么问题来了, 为什么我之前的想法就不对呢?

很头疼, 到现在也想不通为什么我之前那样想有问题? 我倒是大概能想通为什么这里这个做法是正确的, 但是为什么我之前的思路不对? 如果真正面试的时候碰到这种走到自己都不知道哪里错了的死胡同, 感觉就真的姜了.

没有editorial;

@shichaotan said in An O(N) solution with detailed explanation:

// travel all the words combinations to maintain a window  
// there are wl(word len) times travel  
// each time, n/wl words, mostly 2 times travel for each word  
// one left side of the window, the other right side of the window  
// so, time complexity O(wl * 2 * N/wl) = O(2N)  
vector<int> findSubstring(string S, vector<string> &L) {  
    vector<int> ans;  
    int n = S.size(), cnt = L.size();  
    if (n <= 0 || cnt <= 0) return ans;  

    // init word occurence  
    unordered_map<string, int> dict;  
    for (int i = 0; i < cnt; ++i) dict[L[i]]++;  

    // travel all sub string combinations  
    int wl = L[0].size();  
    for (int i = 0; i < wl; ++i) {  
        int left = i, count = 0;  
        unordered_map<string, int> tdict;  
        for (int j = i; j <= n - wl; j += wl) {  
            string str = S.substr(j, wl);  
            // a valid word, accumulate results  
            if (dict.count(str)) {  
                tdict[str]++;  
                if (tdict[str] <= dict[str])   
                    count++;  
                else {  
                    // a more word, advance the window left side possiablly  
                    while (tdict[str] > dict[str]) {  
                        string str1 = S.substr(left, wl);  
                        tdict[str1]--;  
                        if (tdict[str1] < dict[str1]) count--;  
                        left += wl;  
                    }  
                }  
                // come to a result  
                if (count == cnt) {  
                    ans.push_back(left);  
                    // advance one word  
                    tdict[S.substr(left, wl)]--;  
                    count--;  
                    left += wl;  
                }  
            }  
            // not a valid word, reset all vars  
            else {  
                tdict.clear();  
                count = 0;  
                left = j + wl;  
            }  
        }  
    }  

    return ans;  
}

这个做法实际上写的还是非常的简洁的, 虽然我不喜欢他的写法, 我认为我的写法更加好掌握, 感觉就是一个写法的差别而已, 能够好好掌握我自己的写法风格就ok了, 不用太纠结于模仿别人的风格.

他这个算法优于我一开始的思路的一个最主要的问题在于, 任何一个iteration的sliding window里面, 都要保持最小步进是word_len, 永远不要按照+1的step前进. 目前我还没有办法对这个概念提出justification.

另外这个人的复杂度分析有问题, 应该是KN.

TODO: 想通这个问题, 到底怎么避免我之前那个错误的想法思路; 怎么意识到自己为什么错了;

其实我也不是说不理解这里的intuition, 这里的intuition就是你从结果出发, 思考你最后要找的结果到底是什么: 其实就是run of words, 那么如果你把每一个找到的run往前倒退, 那么最后肯定能够停顿在[0, word_len)当中的某一个点; 我想要的, 就是一个更加generalized的, 能指导我去思考其他类似题目的思路; 是因为看到这里讨论的是这些word, 所以认为需要用这种parallel iteration的思路吗? 这种平行思路其实也不是说以前没有见到过;

@qlan2 said in An O(N) solution with detailed explanation:

I think the complexity is O(KN), K = L[0].length(), N = S.length(). If you think I am wrong, please point it out, thanks!

This is my Java version of Sliding Window.

    // time: O(KN)  
public class Solution {  
    // Sliding Window    360ms  
    // ask interviewer if words is empty, should I return empty list  
    public List<Integer> findSubstring(String S, String[] L) {  
        List<Integer> res = new LinkedList<>();  
        if (L.length == 0 || S.length() < L.length * L[0].length())   return res;  
        int N = S.length(), M = L.length, K = L[0].length();  
        Map<String, Integer> map = new HashMap<>(), curMap = new HashMap<>();  
        for (String s : L) {  
            if (map.containsKey(s))   map.put(s, map.get(s) + 1);  
            else                      map.put(s, 1);  
        }  
        String str = null, tmp = null;  
        for (int i = 0; i < K; i++) {  
            int count = 0;  // remark: reset count   
            for (int l = i, r = i; r + K <= N; r += K) {  
                str = S.substring(r, r + K);  
                if (map.containsKey(str)) {  
                    if (curMap.containsKey(str))   curMap.put(str, curMap.get(str) + 1);  
                    else                           curMap.put(str, 1);  

                    if (curMap.get(str) <= map.get(str))    count++;  
                    while (curMap.get(str) > map.get(str)) {  
                        tmp = S.substring(l, l + K);  
                        curMap.put(tmp, curMap.get(tmp) - 1);  
                        l += K;  
                        if (curMap.get(tmp) < map.get(tmp)) count--;  
                    }  
                    if (count == M) {  
                        res.add(l);  
                        tmp = S.substring(l, l + K);  
                        curMap.put(tmp, curMap.get(tmp) - 1);  
                        l += K;  
                        count--;  
                    }  
                }else {  
                    curMap.clear();  
                    count = 0;  
                    l = r + K;  
                }  
            }  
            curMap.clear();  
        }  
        return res;  
    }  
}

他这个基本就是把OP原来的版本给直接抄了一遍, 还好吧只能说;

他们认为是KN好像主要是因为string操作的原因; 对的, 因为你每一sliding window的iteration里面确实是N / WL, 所以最后算出来是O(N), 但是这不代表最后就是KN, 还是要考虑string操作本身的代价;

@simonef said in An O(N) solution with detailed explanation:

No, just no. Looking it up in the hash map ONCE you have the hash is (expected) constant time, but computing the hash itself is linear in the size of the string. And even if hash computation were constant you'd still have substring creation which is definitely NOT constant.

另外这个2其实是有点subtle的, OP这个点抓的很准;

@jianchao.li.fighter said in Easy Two-Map Solution (C++/Java):

I think the following code is self-explanatory enough. We use an unordered_map<string, int> counts to record the expected times of each word and another unordered_map<string, int> seen to record the times we have seen. Then we check for every possible position of i. Once we meet an unexpected word or the times of some word is larger than its expected times, we stop the check. If we finish the check successfully, push i to the result indexes.

C++

class Solution {  
public:  
    vector<int> findSubstring(string s, vector<string>& words) {  
        unordered_map<string, int> counts;  
        for (string word : words)  
            counts[word]++;  
        int n = s.length(), num = words.size(), len = words[0].length();  
        vector<int> indexes;  
        for (int i = 0; i < n - num * len + 1; i++) {  
            unordered_map<string, int> seen;  
            int j = 0;  
            for (; j < num; j++) {  
                string word = s.substr(i + j * len, len);  
                if (counts.find(word) != counts.end()) {  
                    seen[word]++;  
                    if (seen[word] > counts[word])  
                        break;  
                }   
                else break;  
            }  
            if (j == num) indexes.push_back(i);  
        }  
        return indexes;  
    }  
};

Java

class Solution {  
    public List<Integer> findSubstring(String s, String[] words) {  
        final Map<String, Integer> counts = new HashMap<>();  
        for (final String word : words) {  
            counts.put(word, counts.getOrDefault(word, 0) + 1);  
        }  
        final List<Integer> indexes = new ArrayList<>();  
        final int n = s.length(), num = words.length, len = words[0].length();  
        for (int i = 0; i < n - num * len + 1; i++) {  
            final Map<String, Integer> seen = new HashMap<>();  
            int j = 0;  
            while (j < num) {  
                final String word = s.substring(i + j * len, i + (j + 1) * len);  
                if (counts.containsKey(word)) {  
                    seen.put(word, seen.getOrDefault(word, 0) + 1);  
                    if (seen.get(word) > counts.getOrDefault(word, 0)) {  
                        break;  
                    }  
                } else {  
                    break;  
                }  
                j++;  
            }  
            if (j == num) {  
                indexes.add(i);  
            }  
        }  
        return indexes;  
    }  
}

看懂了才发现一个问题, 这个比这个算法不是O(N)的sliding window啊, 这个就是从每一个位置开始扫, 一旦失败就拉倒;

不过代码倒是很干净的, 尤其是seen.get(word) > counts.getOrDefault(word, 0)这里, 很有灵性, 我以前没有想到过把getOrDefault放到右边来用;

不过总体来说这个应该还是一个O(N^2)的算法, 这样的算法最后居然也是AC了;

不过他自己认为其实是KN:

@jianchao.li.fighter said in Easy Two-Map Solution (C++/Java):

Hi, @rabeeh. The space complexity is easy to conclude: the major additional space usage is counts and seen, both of which consumes O(words.size()) space.

For the time complexity, it depends on the two for loops. The outer loop will loop for O(n) times and the inner loop will loop for O(num) times. So the time complexity is at most O(n * num).

Correct me if you have different ideas.

好像也有道理啊, 只要inner loop用num来控制, 而不是用s本身的长度来控制, 最后是不会到N^2的; 不过他这里的这个numN, 实际上还是比wlN要严重的; 而且他这里其实应该也还要多乘一个wl, 凭什么别人的string都不当成constant操作你就可以幸免呢;

@shaka.shadows said in Accepted Java solution 12ms with explanation:

It's not too hard to find some resemblance between this problem and [minimum-window-substring][1]. Actually the main difference is the fact that we are interested at some interval length: we want intervals with fixed length K * M, where K is the number of strings in the "words" array and M the length of each target string. In order to apply the same idea we used for that problem, all we need to do is to map each string from the "words" array to something we are able to index (I prefer to use hashing for this). Also, in order to speed up the algorithm, we can find all occurrences of those strings in S (which is equivalent to do it on demand, but we will potentially do the same matching twice). Notice that, we can simply apply these occurrences as they appear because we are assured that no word is contained by some other. Finally, we use all this information to process each possibility. Notice here that, the fact that all strings has the same length, implies that we have just M (being M the length of each target string) possible starting points, hence we end up performing M linear scans over array with length O(N/M) (being N the length of S) and that makes the scanning stage of the algorithm to be linear on the length of S.

其实并没有解释为什么平行处理就可以了;

public List<Integer> findSubstring(String s, String[] words) {  
    int N = s.length();  
    List<Integer> indexes = new ArrayList<Integer>(s.length());  
    if (words.length == 0) {  
        return indexes;  
    }  
    int M = words[0].length();  
    if (N < M * words.length) {  
        return indexes;  
    }  
    int last = N - M + 1;  

    //map each string in words array to some index and compute target counters  
    Map<String, Integer> mapping = new HashMap<String, Integer>(words.length);  
    int [][] table = new int[2][words.length];  
    int failures = 0, index = 0;  
    for (int i = 0; i < words.length; ++i) {  
        Integer mapped = mapping.get(words[i]);  
        if (mapped == null) {  
            ++failures;  
            mapping.put(words[i], index);  
            mapped = index++;  
        }  
        ++table[0][mapped];  
    }  

    //find all occurrences at string S and map them to their current integer, -1 means no such string is in words array  
    int [] smapping = new int[last];  
    for (int i = 0; i < last; ++i) {  
        String section = s.substring(i, i + M);  
        Integer mapped = mapping.get(section);  
        if (mapped == null) {  
            smapping[i] = -1;  
        } else {  
            smapping[i] = mapped;  
        }  
    }  

    //fix the number of linear scans  
    for (int i = 0; i < M; ++i) {  
        //reset scan variables  
        int currentFailures = failures; //number of current mismatches  
        int left = i, right = i;  
        Arrays.fill(table[1], 0);  
        //here, simple solve the minimum-window-substring problem  
        while (right < last) {  
            while (currentFailures > 0 && right < last) {  
                int target = smapping[right];  
                if (target != -1 && ++table[1][target] == table[0][target]) {  
                    --currentFailures;  
                }  
                right += M;  
            }  
            while (currentFailures == 0 && left < right) {  
                int target = smapping[left];  
                if (target != -1 && --table[1][target] == table[0][target] - 1) {  
                    int length = right - left;  
                    //instead of checking every window, we know exactly the length we want  
                    if ((length / M) ==  words.length) {  
                        indexes.add(left);  
                    }  
                    ++currentFailures;  
                }  
                left += M;  
            }  
        }  

    }  
    return indexes;  
}

[1]: https://leetcode.com/problems/minimum-window-substring/

代码又臭又长, 真的不想看;

@zhidu said in Accepted Java solution 12ms with explanation:

I doubt if u would have enough time to do this much coding during an interview

感觉我其实还是这道题没有快速想到思路; 这个题目思路其实也不是特别奇葩, 只能说还是怪自己. 写完一个主要部分能够AC的程序其实我是有把握30分钟内搞定的, 但是这个题目的各种Corner Case特别多, debug的时间算进去, 估计就没有把握搞定了;

@shaka.shadows said in Accepted Java solution 12ms with explanation:

@zhidu you're absolutely right. Usually in a real interview, you don't have enough time to write large codes but, the good news are... you are rarely required to write large codes too. In my experience (took an Amazon SDE in-person interview round just for preparation, took a Google SRE interview seriously and got a job offer), I was not required to write such large codes but, instead I was required to write the crucial part of potentially large solutions and simply explain (very clearly) the remaining non-written code. Thing is here you cannot explain something to the compiler, you have to write it, and that's why some very efficient solutions have relatively large codes here.

也不知道是不是真的. 不过其实这个题目下面的tag是没有company的, 估计真的就是LeetCode自己哪里看来搞人的;

这个人应该是相当有水平的, 这个代码看起来很费劲;

    Map<String, Integer> mapping = new HashMap<String, Integer>(words.length);  
    int [][] table = new int[2][words.length];  
    int failures = 0, index = 0;  
    for (int i = 0; i < words.length; ++i) {  
        Integer mapped = mapping.get(words[i]);  
        if (mapped == null) {  
            ++failures;  
            mapping.put(words[i], index);  
            mapped = index++;  
        }  
        ++table[0][mapped];  
    }

这个自己手动算了一个简单case之后, 发现这个其实就是一个interning的过程: mapping是用来保存反向查询的; 然后这个table是用来保存每一个string(现在可以用一个integer来代表了)的count; 然后failures其实就是一个count of novelty types. 讲道理, 这个东西还是值得学习一下的; 毕竟以前其实是没有专门练过这个东西, 也就是NLP的时候Jason稍微提了一下;

他最后滑动窗口的处理跟我们其他的人也都不一样, 他这个是正儿八经的, 当碰到invalid的之后, 是不Reset的; 具体怎么做到的, 完全依靠的是代码的细腻之处; 这个人水平真的是很高的;

@FreeTymeKiyan said in Simple Java Solution with Two Pointers and Map:

My idea is pretty simple. Just build a map for the words and their relative count in L. Then we traverse through S to check whether there is a match.

    public static List<Integer> findSubstring(String S, String[] L) {  
        List<Integer> res = new ArrayList<Integer>();  
        if (S == null || L == null || L.length == 0) return res;  
        int len = L[0].length(); // length of each word  

        Map<String, Integer> map = new HashMap<String, Integer>(); // map for L  
        for (String w : L) map.put(w, map.containsKey(w) ? map.get(w) + 1 : 1);  

        for (int i = 0; i <= S.length() - len * L.length; i++) {  
            Map<String, Integer> copy = new HashMap<String, Integer>(map);  
            for (int j = 0; j < L.length; j++) { // checkc if match  
                String str = S.substring(i + j*len, i + j*len + len); // next word  
                if (copy.containsKey(str)) { // is in remaining words  
                    int count = copy.get(str);  
                    if (count == 1) copy.remove(str);  
                    else copy.put(str, count - 1);  
                    if (copy.isEmpty()) { // matches  
                        res.add(i);  
                        break;  
                    }  
                } else break; // not in L  
            }  
        }  
        return res;  
    }

At first I was gonna to use a set for words. Owing to the fact that duplicate is allowed in L, we need to use map instead.

总体来说思路很简单, 直接从每一个位置开始, 然后搜索L * word_len长度, 进行一个匹配, 注意, 这个代码好像现在TLE了:

@hizhaojun said in Simple Java Solution with Two Pointers and Map:

@jocelynayoga I think the copy.remove() is time-consuming, and it's called whenever a substring matches the strings of words. The method should be replaced be checking if the string's count is less than 1. my code with a similar logic got AC with this improvement.

但是有人说就算把这个remove给换掉还是TLE; 另外, 你自己注意尽量不要滥用这个remove, 就按照平常的正常的get/put就行了;

这个是下面改了的一个AC的版本:

public class Solution {  
    public List<Integer> findSubstring(String s, String[] words) {  
        int n = words.length, m = words[0].length();  
        List<Integer> res = new ArrayList();  
        // Store string array with hashtable.  
        HashMap<String, Integer> map = new HashMap();  
        for (String str : words) {  
            if (map.containsKey(str)) map.put(str, map.get(str)+1);  
            else map.put(str, 1);  
        }  
        // m is the length of each word in array words, each time get a substring of length m to check if it exits in words  
        for (int i = 0; i <= s.length()-n*m; i++) {  
            HashMap<String, Integer> copy = new HashMap(map);  
            // if it exits, use another hashset to avoid duplicate and count the number to reach n, the number of words in array words  
            int k = n, j = i;  
            while (k > 0) {  
                String str = s.substring(j, j+m);  
                if (!copy.containsKey(str) || copy.get(str) < 1) break;  
                copy.put(str, copy.get(str)-1);  
                k--; j+=m;  
            }  
            if (k == 0) res.add(i);  
        }  
        return res;  
    }  
}

submission最优解: 27(90):

class Solution {  
    public List<Integer> findSubstring(String s, String[] words) {  

        // Let n=s.length, k=words[0].length traverse s with indices i, i+k,  
        // i+2k, ... for 0<=i<k, so that the time complexity is O(n).  

        List<Integer> res = new ArrayList<Integer>();  
        int len = s.length(), m = words.length, n;  
        if (len == 0 || m == 0 || (n = words[0].length()) == 0) return res;  
        HashMap<String, Integer> wDict = new HashMap<String, Integer>();  
        for (String word : words) wDict.put(word, wDict.getOrDefault(word, 0) + 1);  
        int i, j, start, x, wordsLen = m * n;  
        HashMap<String, Integer> curDict = new HashMap<String, Integer>();  
        String test, temp;  
        for (i = 0; i < n; i++) {  
            curDict.clear();  
            start = i;  
            if (start + wordsLen > len) return res;  
            for (j = i; j <= len - n; j += n) {  
                test = s.substring(j, j + n);  
                if (wDict.containsKey(test)) {  
                    if (!curDict.containsKey(test)) {  
                        curDict.put(test, 1);  
                        start = checkFound(res, start, wordsLen, j, n, curDict, s);  
                    } else {  
                        x = curDict.get(test);  
                        if (x < wDict.get(test)) {  
                            curDict.put(test, x + 1);  

                            start = checkFound(res, start, wordsLen, j, n, curDict, s);  
                            continue;  
                        }  
                        while (!(temp = s.substring(start, start + n)).equals(test)) {  
                            decreaseCount(curDict, temp);  
                            start += n;  
                        }  
                        start += n;  
                    }  
                } else {  
                    // totally failed up to index j+k, slide start and reset all  
                    start = j + n;  
                    curDict.clear();  
                }  
                if (start + wordsLen > len) break;  
            }  
        }  
        return res;  
    }  

    public int checkFound(List<Integer> res, int start, int wordsLen, int j, int k,  
            HashMap<String, Integer> curDict, String s) {  
        if (start + wordsLen == j + k) {  
            res.add(start);  
            // slide start to the next word  
            decreaseCount(curDict, s.substring(start, start + k));  
            return start + k;  
        }  
        return start;  
    }  

    public void decreaseCount(HashMap<String, Integer> curDict, String key) {  
        // remove key if curDict.get(key)==1, otherwise decrease it by 1  
        int x = curDict.get(key);  
        if (x == 1)  
            curDict.remove(key);  
        else  
            curDict.put(key, x - 1);  
    }  
}

看完了感觉这个人的算法跟我的算法几乎是别无二致的, 不知道为什么我的算法最后比他的慢这么多? 是因为我用recursion写的时候中间藏了一些逻辑冗余? 看不出来;

TODO: 为什么我的慢这么多?

另外, 他这里也用了remove操作, 然后要分开判断wDict.containsKey和curDict.containsKey, 我感觉这样带来的后果是他这个代码其实逻辑没有我的清晰;

其他的真的看不出来太多亮点了; 他这个checkFound的函数思路, 其实我也是有过的, 不过说实话, 花拳绣腿, 并不是什么实质性的提高;

Problem Description

You are given a string, s, and a list of words, words, that are all of the same length. Find all starting indices of substring(s) in s that is a concatenation of each word in words exactly once and without any intervening characters.

For example, given:

s: "barfoothefoobarman"  
words: ["foo", "bar"]

You should return the indices: [0,9].
(order does not matter).

Difficulty:Hard
Total Accepted:92.2K
Total Submissions:414.9K
Contributor:LeetCode
Related Topics
hash tabletwo pointersstring
Similar Questions
Minimum Window Substring

LC30 SubstringWithConcatenationOfAllWords

SubstringWithConcatenationOfAllWords [source code]

Problem Description

results matching ""

No results matching ""