return -ENOTSUPP;
 }
 
+static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
+{
+       return &env->insn_aux_data[env->insn_idx];
+}
+
+static bool loop_flag_is_zero(struct bpf_verifier_env *env)
+{
+       struct bpf_reg_state *regs = cur_regs(env);
+       struct bpf_reg_state *reg = ®s[BPF_REG_4];
+       bool reg_is_null = register_is_null(reg);
+
+       if (reg_is_null)
+               mark_chain_precision(env, BPF_REG_4);
+
+       return reg_is_null;
+}
+
+static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
+{
+       struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
+
+       if (!state->initialized) {
+               state->initialized = 1;
+               state->fit_for_inline = loop_flag_is_zero(env);
+               state->callback_subprogno = subprogno;
+               return;
+       }
+
+       if (!state->fit_for_inline)
+               return;
+
+       state->fit_for_inline = (loop_flag_is_zero(env) &&
+                                state->callback_subprogno == subprogno);
+}
+
 static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
                             int *insn_idx_p)
 {
                err = check_bpf_snprintf_call(env, regs);
                break;
        case BPF_FUNC_loop:
+               update_loop_inline_state(env, meta.subprogno);
                err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
                                        set_loop_callback_state);
                break;
        return true;
 }
 
-static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
-{
-       return &env->insn_aux_data[env->insn_idx];
-}
-
 enum {
        REASON_BOUNDS   = -1,
        REASON_TYPE     = -2,
        return 0;
 }
 
+static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
+                                       int position,
+                                       s32 stack_base,
+                                       u32 callback_subprogno,
+                                       u32 *cnt)
+{
+       s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
+       s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
+       s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
+       int reg_loop_max = BPF_REG_6;
+       int reg_loop_cnt = BPF_REG_7;
+       int reg_loop_ctx = BPF_REG_8;
+
+       struct bpf_prog *new_prog;
+       u32 callback_start;
+       u32 call_insn_offset;
+       s32 callback_offset;
+
+       /* This represents an inlined version of bpf_iter.c:bpf_loop,
+        * be careful to modify this code in sync.
+        */
+       struct bpf_insn insn_buf[] = {
+               /* Return error and jump to the end of the patch if
+                * expected number of iterations is too big.
+                */
+               BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2),
+               BPF_MOV32_IMM(BPF_REG_0, -E2BIG),
+               BPF_JMP_IMM(BPF_JA, 0, 0, 16),
+               /* spill R6, R7, R8 to use these as loop vars */
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset),
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset),
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset),
+               /* initialize loop vars */
+               BPF_MOV64_REG(reg_loop_max, BPF_REG_1),
+               BPF_MOV32_IMM(reg_loop_cnt, 0),
+               BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3),
+               /* loop header,
+                * if reg_loop_cnt >= reg_loop_max skip the loop body
+                */
+               BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max, 5),
+               /* callback call,
+                * correct callback offset would be set after patching
+                */
+               BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt),
+               BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx),
+               BPF_CALL_REL(0),
+               /* increment loop counter */
+               BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1),
+               /* jump to loop header if callback returned 0 */
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, -6),
+               /* return value of bpf_loop,
+                * set R0 to the number of iterations
+                */
+               BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt),
+               /* restore original values of R6, R7, R8 */
+               BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset),
+               BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset),
+               BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset),
+       };
+
+       *cnt = ARRAY_SIZE(insn_buf);
+       new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
+       if (!new_prog)
+               return new_prog;
+
+       /* callback start is known only after patching */
+       callback_start = env->subprog_info[callback_subprogno].start;
+       /* Note: insn_buf[12] is an offset of BPF_CALL_REL instruction */
+       call_insn_offset = position + 12;
+       callback_offset = callback_start - call_insn_offset - 1;
+       env->prog->insnsi[call_insn_offset].imm = callback_offset;
+
+       return new_prog;
+}
+
+static bool is_bpf_loop_call(struct bpf_insn *insn)
+{
+       return insn->code == (BPF_JMP | BPF_CALL) &&
+               insn->src_reg == 0 &&
+               insn->imm == BPF_FUNC_loop;
+}
+
+/* For all sub-programs in the program (including main) check
+ * insn_aux_data to see if there are bpf_loop calls that require
+ * inlining. If such calls are found the calls are replaced with a
+ * sequence of instructions produced by `inline_bpf_loop` function and
+ * subprog stack_depth is increased by the size of 3 registers.
+ * This stack space is used to spill values of the R6, R7, R8.  These
+ * registers are used to store the loop bound, counter and context
+ * variables.
+ */
+static int optimize_bpf_loop(struct bpf_verifier_env *env)
+{
+       struct bpf_subprog_info *subprogs = env->subprog_info;
+       int i, cur_subprog = 0, cnt, delta = 0;
+       struct bpf_insn *insn = env->prog->insnsi;
+       int insn_cnt = env->prog->len;
+       u16 stack_depth = subprogs[cur_subprog].stack_depth;
+       u16 stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+       u16 stack_depth_extra = 0;
+
+       for (i = 0; i < insn_cnt; i++, insn++) {
+               struct bpf_loop_inline_state *inline_state =
+                       &env->insn_aux_data[i + delta].loop_inline_state;
+
+               if (is_bpf_loop_call(insn) && inline_state->fit_for_inline) {
+                       struct bpf_prog *new_prog;
+
+                       stack_depth_extra = BPF_REG_SIZE * 3 + stack_depth_roundup;
+                       new_prog = inline_bpf_loop(env,
+                                                  i + delta,
+                                                  -(stack_depth + stack_depth_extra),
+                                                  inline_state->callback_subprogno,
+                                                  &cnt);
+                       if (!new_prog)
+                               return -ENOMEM;
+
+                       delta     += cnt - 1;
+                       env->prog  = new_prog;
+                       insn       = new_prog->insnsi + i + delta;
+               }
+
+               if (subprogs[cur_subprog + 1].start == i + delta + 1) {
+                       subprogs[cur_subprog].stack_depth += stack_depth_extra;
+                       cur_subprog++;
+                       stack_depth = subprogs[cur_subprog].stack_depth;
+                       stack_depth_roundup = round_up(stack_depth, 8) - stack_depth;
+                       stack_depth_extra = 0;
+               }
+       }
+
+       env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
+
+       return 0;
+}
+
 static void free_states(struct bpf_verifier_env *env)
 {
        struct bpf_verifier_state_list *sl, *sln;
                ret = check_max_stack_depth(env);
 
        /* instruction rewrites happen after this point */
+       if (ret == 0)
+               ret = optimize_bpf_loop(env);
+
        if (is_priv) {
                if (ret == 0)
                        opt_hard_wire_dead_code_branches(env);