CrackingTheSafe [source code]


public class CrackingTheSafe {
static
/******************************************************************************/
class Solution {
    public String crackSafe(int n, int k) {
        if (n == 1 && k == 1)
            return "0";
        StringBuilder sb = new StringBuilder ();
        for (int i = 0; i < n - 1; i++) {
            sb.append ("0");
        }
        String start = sb.toString ();
        StringBuilder ans = new StringBuilder ();
        dfs (start, ans, new HashSet<> (), start, k);
        return ans.toString ();
    }

    void dfs (String root, StringBuilder ans, Set<String> seen, String link, int k) {
        for (int x = 0; x < k; x++) {
            String nei = root + x;
            if (!seen.contains (nei)) {
                seen.add (nei);
                dfs (nei.substring (1), ans, seen, x + "", k);
            }
        }
        ans.append (link);
    }
}
/******************************************************************************/

    public static void main(String[] args) {
        CrackingTheSafe.Solution tester = new CrackingTheSafe.Solution();
        String[] inputs = {
            "3", "2", "0001110100",
            "2", "2", "00110",
            "1", "2", "01",
            "4", "8", "0007777676776676666777567757675667575776567657665666575656577556755765566557555655557774677457747674667456747574657455747477646764576476646664566475646564556474646477546754575476546654565475546554555474546454547744674457447644664456447544654455447444644454444777367735773477376736673567346737573657355734573747364735473447373776367635763476376636663566346637563656355634563746364635463446373636377536753575347537653665356534653755365535553455374536453545344537353635353774367435743474376436643564346437543654355434543744364435443444373436343534343773367335733473376336633563346337533653355334533743364335433443373336333533343333777267725772477237727672667256724672367275726572557245723572747264725472447234727372637253724372337272776267625762476237627662666256624662366275626562556245623562746264625462446234627362636253624362336272626277526752575247523752765266525652465236527552655255524552355274526452545244523452735263525352435233527252625252774267425742474237427642664256424642364275426542554245423542744264425442444234427342634253424342334272426242524242773267325732473237327632663256324632363275326532553245323532743264325432443234327332633253324332333272326232523242323277226722572247223722762266225622462236227522652255224522352274226422542244223422732263225322432233227222622252224222322227771677157714771377127717671667156714671367126717571657155714571357125717471647154714471347124717371637153714371337123717271627152714271327122717177616761576147613761276176616661566146613661266175616561556145613561256174616461546144613461246173616361536143613361236172616261526142613261226171616177516751575147513751275176516651565146513651265175516551555145513551255174516451545144513451245173516351535143513351235172516251525142513251225171516151517741674157414741374127417641664156414641364126417541654155414541354125417441644154414441344124417341634153414341334123417241624152414241324122417141614151414177316731573147313731273176316631563146313631263175316531553145313531253174316431543144313431243173316331533143313331233172316231523142313231223171316131513141313177216721572147213721272176216621562146213621262175216521552145213521252174216421542144213421242173216321532143213321232172216221522142213221222171216121512141213121217711671157114711371127117611661156114611361126117511651155114511351125117411641154114411341124117311631153114311331123117211621152114211321122117111611151114111311121111777067705770477037702770177076706670567046703670267016707570657055704570357025701570747064705470447034702470147073706370537043703370237013707270627052704270327022701270717061705170417031702170117070776067605760476037602760176076606660566046603660266016607560656055604560356025601560746064605460446034602460146073606360536043603360236013607260626052604260326022601260716061605160416031602160116070606077506750575047503750275017507650665056504650365026501650755065505550455035502550155074506450545044503450245014507350635053504350335023501350725062505250425032502250125071506150515041503150215011507050605050774067405740474037402740174076406640564046403640264016407540654055404540354025401540744064405440444034402440144073406340534043403340234013407240624052404240324022401240714061405140414031402140114070406040504040773067305730473037302730173076306630563046303630263016307530653055304530353025301530743064305430443034302430143073306330533043303330233013307230623052304230323022301230713061305130413031302130113070306030503040303077206720572047203720272017207620662056204620362026201620752065205520452035202520152074206420542044203420242014207320632053204320332023201320722062205220422032202220122071206120512041203120212011207020602050204020302020771067105710471037102710171076106610561046103610261016107510651055104510351025101510741064105410441034102410141073106310531043103310231013107210621052104210321022101210711061105110411031102110111070106010501040103010201010770067005700470037002700170076006600560046003600260016007500650055004500350025001500740064005400440034002400140073006300530043003300230013007200620052004200320022001200710061005100410031002100110070006000500040003000200010000",
        };
        for (int i = 0; i < inputs.length / 3; i++) {
            System.out.println (Printer.separator ());
            int n = Integer.parseInt (inputs[3 * i]), k = Integer.parseInt (inputs[3 * i + 1]);
            String ans = inputs[3 * i + 2], output = tester.crackSafe (n, k);
            System.out.printf ("%d, %d\n%s\n%s\n",
                n, k, Printer.wrap (output, check (output, ans) ? 92 : 91), ans
            );
        }
    }

    static boolean check (String a, String b) {
        return a.length () == b.length () && (a + a).contains (b);
    }
}

等于说这题最后求的还是个完整版的;

给的tag有DFS, 这个应该是更好的做法; 写写看? 数学解法之前三分地里面看过了;

虽然看起来是完整的自由度, 不过下面还是给了缩小的范围; 估计比较大的case应该是2^12或者10^3这种的;

没什么好想法, 难道把所有的node都定义出来? 那么如果是B(k, n) = B(2, 4)这种, 最后就要有2^4个node, 这个指数级别的node, 能维护的了吗? 而且就算是定义graph, 还要考虑edge, edge是什么呢?

看完editorial回来, editorial2实在是懒得看了, 尝试实现一下editorial1, 这个感觉如果时间充足, 还是可以在面试当中讲清楚的;

最后把awice那个写法稍微改了一下, 因为他那个写法, 实际上是在一个node的parent那里完成自己的append操作, 我不太喜欢这个, 比如我退出下面图上的B的时候, 我希望自己就把自己的parent link对应的x给append上去; 稍微修改一下就行了; 最后的速度是24(39). 速度比较快的好像都是用editorial2那个lyndon word的东西做的; 暂时没有时间, 不看了, 反正也是纯数学解法;

这个自己在debug的时候有一个大坑; 这里最后一个test, 8^4这个, 是比较大的, 当时刚开始debug的时候, 有留下我平时的indent和DEBUG组合的东西, 结果就爆栈了; 当时想了半天也想不通, 后来把这些log的东西都删了之后, 结果就不爆栈了; 这个真的是有点不小心; 因为indent好像其实占用的地方也很大? 其实什么原因我也不知道, 反正以后要是碰到这种, 尝试先把log的功能删了(查找indent, 然后comment就行了)试试看;


editorial

Approach #1: Hierholzer's Algorithm [Accepted]

We can think of this problem as the problem of finding an Euler path (a path visiting every edge exactly once) on the following graph:...

For example, when k = 4, n = 3, the nodes are '00', '01', '02', ..., '32', '33' and each node has 4 edges '0', '1', '2', '3'. A node plus edge represents a complete edge and viewing that substring in our answer.

这个概念搞清楚, 这个是理解下面的核心;

Any connected directed graph where all nodes have equal in-degree and out-degree has an Euler circuit (an Euler path ending where it started.) Because our graph is highly connected and symmetric, we should expect intuitively that taking any path greedily in some order will probably result in an Euler path.

An Euler path is a path that uses every edge of a graph exactly once. An Euler circuit is a circuit that uses every edge of a graph exactly once. ▶ An Euler path starts and ends at different vertices. ▶ An Euler circuit starts and ends at the same vertex.

This intuition is called Hierholzer's algorithm: whenever there is an Euler cycle, we can construct it greedily. The algorithm goes as follows:

  • Starting from a vertex u, we walk through (unwalked) edges until we get stuck. Because the in-degrees and out-degrees of each node are equal, we can only get stuck at u, which forms a cycle.
  • Now, for any node v we had visited that has unwalked edges, we start a new cycle from v with the same procedure as above, and then merge the cycles together to form a new cycle: u -> ... -> v -> ... -> v -> ... -> u.

这个解释还是可以的, 不过看到这里其实有点还不是很懂;

Algorithm

We will modify our standard depth-first search: instead of keeping track of nodes, we keep track of (complete) edges: seen records if an edge has been visited.

Also, we'll need to visit in a sort of "post-order", recording the answer after visiting the edge. This is to prevent getting stuck. For example, with k = 2, n = 2, we have the nodes '0', '1'. If we greedily visit complete edges '00', '01', '10', we will be stuck at the node '0' prematurely.

这里的greedily实际上就是每当在一个node, 有限选最小的一个unseen edge, 比如在1的时候, 有限选edge0(对应complete edge是10), 然后选edge1 (complete edge 11). 这个是写代码的时候反正总归是要有一个iterate edges的顺序的; 一般也就是从小到大;

However, if we visit in post-order, we'll end up visiting '00', '01', '11', '10' correctly.

不知道他这里想要表达的是什么观点; 不过看他这个意思, 好像是alphabet里面0..k-1个node就够了? 而不是指数级别的所有组合都表达出来; 然后用edge来表示一个window; 这个还是一个不错的想法; n=2的时候可能看不太出来, 如果是n比如说是4的时候, 一个edge对应的应该不是一个window(毕竟两头只有两个数字啊), 应该是一个append操作, 类似一个滑动操作, 比如你window本来是000, 然后你在node0, 你走向node1, 对应的就是000滑动到了001;

暂时是这么理解的, 不一定对; 不过如果是这个想法, 这题的一个难点其实在于这个graph的表示怎么构建; node和edge的概念都不是那么显而易见;

In general, during our Hierholzer walk, we will record the results of other subcycles first, before recording the main cycle we started from, just as in our first description of the algorithm. Technically, we are recording backwards, as we exit the nodes.

exit的时候record; 如果你看代码的话, 实际上是在pre的位置来完成这个record的操作; 严格来说这个不是PostOrder;

For example, we will walk (in the "original cycle") until we get stuck, then record the node as we exit. (Every edge walked is always marked immediately so that it can no longer be used.) Then in the penultimate node of our original cycle, we will do a Hierholzer walk and then record this node; then in the third-last node of our original cycle we will do a Hierholzer walk and then record this node, and so on.

这一段没怎么看懂, 为什么不来两张图呢;

class Solution {  
    Set<String> seen;  
    StringBuilder ans;  

    public String crackSafe(int n, int k) {  
        if (n == 1 && k == 1) return "0";  
        seen = new HashSet();  
        ans = new StringBuilder();  

        StringBuilder sb = new StringBuilder();  
        for (int i = 0; i < n-1; ++i)  
            sb.append("0");  
        String start = sb.toString();  

        dfs(start, k);  
        ans.append(start);  
        return new String(ans);  
    }  

    public void dfs(String node, int k) {  
        for (int x = 0; x < k; ++x) {  
            String nei = node + x;  
            if (!seen.contains(nei)) {  
                seen.add(nei);  
                dfs(nei.substring(1), k);  
                ans.append(x);  
            }  
        }  
    }  
}

代码倒是出奇的简单;

主函数开始看, edgecase和basecase先不看;

然后我们先尝试一下2^2的例子;

最后node其实还是word啊, 我还以为是单独的digit呢; 应该说是短word, 最后window是放在complete edge里面的; 比如2^3这种, 那么node就是00, 01, 10这种, 但是window长度实际上是3;

用这个代码trace了一下:

class Solution {  
    Set<String> seen;  
    StringBuilder ans;  
    boolean DEBUG = true;  
    String indent = "";  

    public String crackSafe(int n, int k) {  
        if (n == 1 && k == 1) return "0";  
        seen = new HashSet<> ();  
        ans = new StringBuilder();  

        StringBuilder sb = new StringBuilder();  
        for (int i = 0; i < n-1; ++i)  
            sb.append("0");  
        String start = sb.toString();  

        dfs(start, k);  
        ans.append(start);  
        return new String(ans);  
    }  

    public void dfs(String node, int k) {  
        String old_idt = indent;  
        if (DEBUG) indent += "    ";  
        if (DEBUG) System.out.printf ("%snode: %s, ans: %s, seen: %s\n", indent, Printer.wrap (node, 95), ans, seen);  
        for (int x = 0; x < k; ++x) {  
            String nei = node + x;  
            if (!seen.contains(nei)) {  
                if (DEBUG) System.out.printf ("%s>>> nei:%s\n", indent, nei);  
                seen.add(nei);  
                dfs(nei.substring(1), k);  
                ans.append(x);  
            }  
        }  
        if (DEBUG) System.out.printf ("%snode: %s, ans: %s, seen: %s RETURNS\n", indent, Printer.wrap (node, 95), ans, seen);  
        indent = old_idt;  
    }  
}

你从下往上看, 就看到node是00110, 也就是最后记录的这个path的reverse; 这个order, 还有记录的时机, 还是不太理解到底什么原理;

画了这个环形图, 感觉对于理解没有太大帮助; 干脆还是把recursion tree画画看;

这个图就比较能反映他最后的这个add的时间顺序, 也就是他前面说的倒着来; 那么为什么这样做呢?

你看这个tree上面, 跟上面的环形graph还是差别比较大的, 你要记住一点, 这个图问题虽然有node, 但是node本身并不是一个主要的state, node只是一个过度两个complete edge的中间点而已;
所以在tree上面, 我根本不去强调, 比如这几个0之间的关联性: 完全无所谓; 这个tree你看下来, 比较需要注意的是, 看到几个complete edge确实是完整把所有的window exactly once的走完整一遍;

剩下一个问题, 就是这个order, 为什么这些node这样add一下最后就能得到一个把所有complete edge平滑衔接的过程呢;

这个仔细看还真的是一个PostOrder的过程; 这个图不太好, 这个图看起来好像add的是node, 实际上add的是x; 也就是edge;

2^3的trace:

然后对应的tree:

这个图形应该是没问题的; 然后每一个edge我们换成了正常的edge, 而不是complete edge; 然后complete edge和seen的操作都是implicit的;
好, 尝试理解这个trace;

这里就要注意把一个node和自己的parent link联合起来看; 因为最后你这个node退出的时候, 加入能够add, 最后add的就是你的parent link对应的字母x;

我好像大概知道什么意思了; 首先, 这种复杂的DFS的题目, 我们还是类似以前写排列组合的时候一样的思路, 先把完整的tree给画出来, 然后考虑我们想要怎么调整这个tree, 或者, 跟这题一样, 调整我们利用这个tree的方式;

先澄清一点, 这题这个tree的结构是根据什么来决定的? 就是根据seen来决定的; complete edge本身从来没有explicit的出现在这个tree当中, 但是呢, 他决定了这个tree最后的结构: 哪里可以branch, 最后能走多深, 每一个root2leaf的path最终能走到哪里; 也就是上面那个tree蓝色的部分, 这个其实你是知道怎么做出来的, 就是一个保证complete tree不重复(用set来做的很trivial的去重)的情况下, 构建出来的;

这个答案是awice写的, 看懂之后不得不说他的很多思路真的有帮助;

理解这个算法的一个核心, 是理解他所谓的合并cycle的过程; 比如有一个path, 我们从u开始, u->...->v->..->v->..->u, 这里, 最后是不是到u其实无所谓, 严格来说并不是停在u, 这个停止就是用seen来控制的; 这个是awice讲的不太好的一点; 我们就写成... 好像这个也没什么用;

上面描述intuition的第一点的时候, 提到一个get stuck, 其实就是这里描述的, 不完全就是因为回到起点而stuck, 而也有可能是因为比如这里seen的限制, 导致stuck, 也就是走到头了;

他后面说什么we can only get stuck at u, 这个看上面的图就知道了, 是不对的;

他上面对于PostOrder的原因的解释也太简单了; 他的意思就是纯粹是手段上避免stuck, 但是实际上我认为是一个merge cycle的需要; 具体怎么merge我暂时还没想透;

大概想出来一点, 没有完整的证明, 但是好过没有; 首先, 看这个图(无效的leaf都删了);

我首先声明一个重要性质, 所有node的right successor, 或者说ceiling, 都跟他相同; 这个你可以验证, 比如A和E相同, 然后B和第二level的那个00相同(root没有right successor, 所以不管); 这个是一个很重要的性质, 我暂时不知道怎么证明, 不过面试的时候, 拿着这个tree, 能够观察出来这一点, 应该还算可以了;

TODO: 证明这个性质;

然后后面的就不难理解了; 首先, 我们不考虑怎么把所有的edge串起来(这个是我们的目标); 我们只考虑我们有多少合理的root to leaf的path; 很直接, 就是用从root to B, root to E, root to F这三条path;

当我们从root -> A -> B这一条走到头的时候, 实际上我们这时候这个path是合理的, 但是还不够, 意思是呢, 我们所有的length3的window还没有探索完毕; 所以在A, 我们需要继续branch, 向右branch; 然后A走到了E, 这里呢, 又是一个到头了, 但是这个时候, 我们的window还是不够; 所以在D, 继续branch, 到了F, 最后就是有三条root2leaf的path; 有了这三条, 全都有, 才能保证所有的window都被我们这个tree涵盖了;
当然了, 现在这三条path是有overlap也有disjoint, 还无法表达成为一个stream;

但是我们目标很明确, 如果我们能够把这三条path组合到一个stream里面去, 任务就完成了; 那么怎么组合, 或者说, 组合的限制是什么? 随便连接起来行吗? 当然了, 如果不是最优解, 你就三条path都连接起来也可以; 但是我们要求最优解, 所以重叠的部分, 要处理好;

这里上面那条性质就有用了; 我们从简单的的开始, 按照上面的性质, 我们知道D肯定和F相等; D能够走到E, 也能够走到F, 我们怎么把DE和DF合并呢? 很简单, 因为F和D相等, 所以我们D到F, 然后让F(而不是D自己)走到E, 就行了; 这个相当于一个把DE这条path, 展开, 成为了DFE这个path的过程; 这样, 我们DFE这个path, 就把DE和DF都包含进去了;

那么ACDFE和AB怎么合并呢, 一样的道理, 就是把绿色的这个path, 展开成为, 要包含A右边的subtree的内容; 方法还是一样的; 因为我们知道A的ceiling肯定等于A自己; 也就是说, 如果是一个PostOrder, 那么[[B][C-subtree-serialization] A]这样一个, 我们知道C-subtree-serialization的第一个, 肯定是A; 所以这样摆起来之后, 最后得到的就是一个A->C-subtree-serialization->B这样的一个path, 而且还是城里的, 因为C-subtree-serialization的第一个等于A, 所以能够平滑的吃一个x(B对应的那个x, 也额就是这里的0, 这个0让A变成了B, 也自然能够让ceil(A)变成B), 就变成B;
这样, 两个path又合并起来了;

按照这个程度, 最后算法实现的时候注意细节, 这个算法应该就是理解了; 当然, 那个性质的证明, 还有待商榷;

真的难好吧;

时间复杂度: n * (k^n): We visit every edge once in our depth-first search, and nodes take O(n) space.

一个简单的思路就是看到上面最后成功被走的edge, 只有2^3个; 这个n不太理解是怎么来的;

Approach #2: Inverse Burrows-Wheeler Transform [Accepted]

Explanation

If we are familiar with the theory of combinatorics on words, recall that a Lyndon Word L is a word that is the unique minimum of it's rotations.

One important mathematical result (due to Fredericksen and Maiorana), is that the concatenation in lexicographic order of Lyndon words with length dividing n, forms a de Bruijin sequence: a sequence where every every word (from the k^n available) appears as a substring of length n (where we are allowed to wrap around.)

这个就是math解法了; 上面那个欧拉环的解法虽然难, 但是实际上还是有一定的可能性能够做出来的; 这种要提前知道一个数学定理的, 就真的有点凉凉;

这个描述有点看不懂, 但是意思好像是我们要始终选择最小rotation?

For example, when n = 6, k = 2, all the Lyndon words with length dividing n in lexicographic order are (spaces for convenience): 0 000001 000011 000101 000111 001 001011 001101 001111 01 010111 011 011111 1. It turns out this is the smallest de Bruijin sequence.

...

完全看不懂, 后面有空再看吧;

class Solution {  
    public String crackSafe(int n, int k) {  
        int M = (int) Math.pow(k, n-1);  
        int[] P = new int[M * k];  
        for (int i = 0; i < k; ++i)  
            for (int q = 0; q < M; ++q)  
                P[i*M + q] = q*k + i;  

        StringBuilder ans = new StringBuilder();  
        for (int i = 0; i < M*k; ++i) {  
            int j = i;  
            while (P[j] >= 0) {  
                ans.append(String.valueOf(j / M));  
                int v = P[j];  
                P[j] = -1;  
                j = v;  
            }  
        }  

        for (int i = 0; i < n-1; ++i)  
            ans.append("0");  
        return new String(ans);  
    }  
}

UNFINISHED


Problem Description

There is a box protected by a password. The password is n digits, where each letter can be one of the first k digits 0, 1, ..., k-1.

You can keep inputting the password, the password will automatically be matched against the last n digits entered.

For example, assuming the password is "345", I can open it when I type "012345", but I enter a total of 6 digits.

Please return any string of minimum length that is guaranteed to open the box after the entire string is inputted.

Example 1:

Input: n = 1, k = 2  
Output: "01"  
Note: "10" will be accepted too.

Example 2:

Input: n = 2, k = 2  
Output: "00110"  
Note: "01100", "10011", "11001" will be accepted too.

Note:

  • n will be in the range [1, 4].
  • k will be in the range [1, 10].
  • k^n will be at most 4096.

Difficulty:Hard
Total Accepted:3.6K
Total Submissions:9.1K
Contributor:1337c0d3r
Companies
google
Related Topics
mathdepth-first search

results matching ""

    No results matching ""