(* File: Data_Structures/Selection.thy Author: Manuel Eberl, TU München
*)
section \<open>The Median-of-Medians Selection Algorithm\<close> theory Selection imports Complex_Main "HOL-Library.Time_Functions" Sorting begin
text\<open> Note that there is significant overlap between this theory (which is intended mostly for the
Functional Data Structures book) and the Median-of-Medians AFP entry. \<close>
subsection \<open>Auxiliary material\<close>
lemma replicate_numeral: "replicate (numeral n) x = x # replicate (pred_numeral n) x" by (simp add: numeral_eq_Suc)
lemma insort_correct: "insort xs = sort xs" using sorted_insort mset_insort by (metis properties_for_sort)
lemma sum_list_replicate [simp]: "sum_list (replicate n x) = n * x" by (induction n) auto
lemma set_mset_sum_list [simp]: "set_mset (sum_list xs) = (\x\set xs. set_mset x)" by (induction xs) auto
lemma filter_mset_image_mset: "filter_mset P (image_mset f A) = image_mset f (filter_mset (\x. P (f x)) A)" by (induction A) auto
lemma filter_mset_sum_list: "filter_mset P (sum_list xs) = sum_list (map (filter_mset P) xs)" by (induction xs) simp_all
lemma sum_mset_mset_mono: assumes"(\x. x \# A \ f x \# g x)" shows"(\x\#A. f x) \# (\x\#A. g x)" using assms by (induction A) (auto intro!: subset_mset.add_mono)
lemma mset_filter_mono: assumes"A \# B" "\x. x \# A \ P x \ Q x" shows"filter_mset P A \# filter_mset Q B" by (rule mset_subset_eqI) (insert assms, auto simp: mset_subset_eq_count count_eq_zero_iff)
lemma size_mset_sum_mset_distrib: "size (sum_mset A :: 'a multiset) = sum_mset (image_mset size A)" by (induction A) auto
lemma sum_mset_mono: assumes"\x. x \# A \ f x \ (g x :: 'a :: {ordered_ab_semigroup_add,comm_monoid_add})" shows"(\x\#A. f x) \ (\x\#A. g x)" using assms by (induction A) (auto intro!: add_mono)
lemma filter_mset_is_empty_iff: "filter_mset P A = {#} \ (\x. x \# A \ \P x)" by (auto simp: multiset_eq_iff count_eq_zero_iff)
lemma Min_set_sorted: "sorted xs \ xs \ [] \ Min (set xs) = hd xs" by (cases xs; force intro: Min_insert2)
lemma hd_sort: fixes xs :: "'a :: linorder list" shows"xs \ [] \ hd (sort xs) = Min (set xs)" by (subst Min_set_sorted [symmetric]) auto
lemma length_filter_conv_size_filter_mset: "length (filter P xs) = size (filter_mset P (mset xs))" by (induction xs) auto
lemma sorted_filter_less_subset_take: assumes"sorted xs"and"i < length xs" shows"{#x \# mset xs. x < xs ! i#} \# mset (take i xs)" using assms proof (induction xs arbitrary: i rule: list.induct) case (Cons x xs i) show ?case proof (cases i) case 0 thus ?thesis using Cons.prems by (auto simp: filter_mset_is_empty_iff) next case (Suc i') have"{#y \# mset (x # xs). y < (x # xs) ! i#} \# add_mset x {#y \# mset xs. y < xs ! i'#}" using Suc Cons.prems by (auto) alsohave"\ \# add_mset x (mset (take i' xs))" unfolding mset_subset_eq_add_mset_cancel using Cons.prems Suc by (intro Cons.IH) (auto) alsohave"\ = mset (take i (x # xs))" by (simp add: Suc) finallyshow ?thesis . qed qed auto
lemma sorted_filter_greater_subset_drop: assumes"sorted xs"and"i < length xs" shows"{#x \# mset xs. x > xs ! i#} \# mset (drop (Suc i) xs)" using assms proof (induction xs arbitrary: i rule: list.induct) case (Cons x xs i) show ?case proof (cases i) case 0 thus ?thesis by (auto simp: sorted_append filter_mset_is_empty_iff) next case (Suc i') have"{#y \# mset (x # xs). y > (x # xs) ! i#} \# {#y \# mset xs. y > xs ! i'#}" using Suc Cons.prems by (auto simp: set_conv_nth) alsohave"\ \# mset (drop (Suc i') xs)" using Cons.prems Suc by (intro Cons.IH) (auto) alsohave"\ = mset (drop (Suc i) (x # xs))" by (simp add: Suc) finallyshow ?thesis . qed qed auto
subsection \<open>Chopping a list into equally-sized bits\<close>
fun chop :: "nat \ 'a list \ 'a list list" where "chop 0 _ = []"
| "chop _ [] = []"
| "chop n xs = take n xs # chop n (drop n xs)"
text\<open>
This is an alternative induction rule for\<^const>\<open>chop\<close>, which is often nicer to use. \<close> lemma chop_induct' [case_names trivial reduce]: assumes"\n xs. n = 0 \ xs = [] \ P n xs" assumes"\n xs. n > 0 \ xs \ [] \ P n (drop n xs) \ P n xs" shows"P n xs" using assms proof induction_schema show"wf (measure (length \ snd))" by auto qed (blast | simp)+
lemma chop_eq_Nil_iff [simp]: "chop n xs = [] \ n = 0 \ xs = []" by (induction n xs rule: chop.induct; subst chop.simps) auto
lemma chop_Nil [simp]: "chop n [] = []" by (cases n) auto
lemma chop_reduce: "n > 0 \ xs \ [] \ chop n xs = take n xs # chop n (drop n xs)" by (cases n; cases xs) (auto simp: chop.simps)
lemma concat_chop [simp]: "n > 0 \ concat (chop n xs) = xs" by (induction n xs rule: chop.induct; subst chop.simps) auto
lemma chop_elem_not_Nil [dest]: "ys \ set (chop n xs) \ ys \ []" by (induction n xs rule: chop.induct; subst (asm) chop.simps)
(auto simp: eq_commute[of "[]"] split: if_splits)
lemma length_chop_part_le: "ys \ set (chop n xs) \ length ys \ n" by (induction n xs rule: chop.induct; subst (asm) chop.simps) (auto split: if_splits)
lemma length_chop: assumes"n > 0" shows"length (chop n xs) = nat \length xs / n\" proof - from\<open>n > 0\<close> have "real n * length (chop n xs) \<ge> length xs" by (induction n xs rule: chop.induct; subst chop.simps) (auto simp: field_simps) moreoverfrom\<open>n > 0\<close> have "real n * length (chop n xs) < length xs + n" by (induction n xs rule: chop.induct; subst chop.simps)
(auto simp: field_simps split: nat_diff_split_asm)+ ultimatelyhave"length (chop n xs) \ length xs / n" and "length (chop n xs) < length xs / n + 1" using assms by (auto simp: field_simps) thus ?thesis by linarith qed
lemma sum_msets_chop: "n > 0 \ (\ys\chop n xs. mset ys) = mset xs" by (subst mset_concat [symmetric]) simp_all
lemma UN_sets_chop: "n > 0 \ (\ys\set (chop n xs). set ys) = set xs" by (simp only: set_concat [symmetric] concat_chop)
lemma chop_append: "d dvd length xs \ chop d (xs @ ys) = chop d xs @ chop d ys" by (induction d xs rule: chop_induct') (auto simp: chop_reduce dvd_imp_le)
lemma chop_replicate [simp]: "d > 0 \ chop d (replicate d xs) = [replicate d xs]" by (subst chop_reduce) auto
lemma chop_replicate_dvd [simp]: assumes"d dvd n" shows"chop d (replicate n x) = replicate (n div d) (replicate d x)" proof (cases "d = 0") case False from assms obtain k where k: "n = d * k" by blast have"chop d (replicate (d * k) x) = replicate k (replicate d x)" using False by (induction k) (auto simp: replicate_add chop_append) thus ?thesis using False by (simp add: k) qed auto
lemma chop_concat: assumes"\xs\set xss. length xs = d" and "d > 0" shows"chop d (concat xss) = xss" using assms proof (induction xss) case (Cons xs xss) have"chop d (concat (xs # xss)) = chop d (xs @ concat xss)" by simp alsohave"\ = chop d xs @ chop d (concat xss)" using Cons.prems by (intro chop_append) auto alsohave"chop d xs = [xs]" using Cons.prems by (subst chop_reduce) auto alsohave"chop d (concat xss) = xss" using Cons.prems by (intro Cons.IH) auto finallyshow ?caseby simp qed auto
subsection \<open>Selection\<close>
definition select :: "nat \ ('a :: linorder) list \ 'a" where "select k xs = sort xs ! k"
lemma select_0: "xs \ [] \ select 0 xs = Min (set xs)" by (simp add: hd_sort select_def flip: hd_conv_nth)
lemma select_mset_cong: "mset xs = mset ys \ select k xs = select k ys" using sort_mset_cong[of xs ys] unfolding select_def by auto
lemma select_in_set [intro,simp]: assumes"k < length xs" shows"select k xs \ set xs" proof - from assms have"sort xs ! k \ set (sort xs)" by (intro nth_mem) auto alsohave"set (sort xs) = set xs"by simp finallyshow ?thesis by (simp add: select_def) qed
lemma assumes"n < length xs" shows size_less_than_select: "size {#y \# mset xs. y < select n xs#} \ n" and size_greater_than_select: "size {#y \# mset xs. y > select n xs#} < length xs - n" proof - have"size {#y \# mset (sort xs). y < select n xs#} \ size (mset (take n (sort xs)))" unfolding select_def using assms by (intro size_mset_mono sorted_filter_less_subset_take) auto thus"size {#y \# mset xs. y < select n xs#} \ n" by simp have"size {#y \# mset (sort xs). y > select n xs#} \ size (mset (drop (Suc n) (sort xs)))" unfolding select_def using assms by (intro size_mset_mono sorted_filter_greater_subset_drop) auto thus"size {#y \# mset xs. y > select n xs#} < length xs - n" using assms by simp qed
subsection \<open>The designated median of a list\<close>
definition median where"median xs = select ((length xs - 1) div 2) xs"
lemma median_in_set [intro, simp]: assumes"xs \ []" shows"median xs \ set xs" proof - from assms have"length xs > 0"by auto hence"(length xs - 1) div 2 < length xs"by linarith thus ?thesis by (simp add: median_def) qed
lemma size_less_than_median: "size {#y \# mset xs. y < median xs#} \ (length xs - 1) div 2" proof (cases "xs = []") case False hence"length xs > 0" by auto hence less: "(length xs - 1) div 2 < length xs" by linarith show"size {#y \# mset xs. y < median xs#} \ (length xs - 1) div 2" using size_less_than_select[OF less] by (simp add: median_def) qed auto
lemma size_greater_than_median: "size {#y \# mset xs. y > median xs#} \ length xs div 2" proof (cases "xs = []") case False hence"length xs > 0" by auto hence less: "(length xs - 1) div 2 < length xs" by linarith have"size {#y \# mset xs. y > median xs#} < length xs - (length xs - 1) div 2" using size_greater_than_select[OF less] by (simp add: median_def) alsohave"\ = length xs div 2 + 1" using\<open>length xs > 0\<close> by linarith finallyshow"size {#y \# mset xs. y > median xs#} \ length xs div 2" by simp qed auto
subsection \<open>A recurrence for selection\<close>
definition partition3 :: "'a \ 'a :: linorder list \ 'a list \ 'a list \ 'a list" where "partition3 x xs = (filter (\y. y < x) xs, filter (\y. y = x) xs, filter (\y. y > x) xs)"
lemma partition3_code [code]: "partition3 x [] = ([], [], [])" "partition3 x (y # ys) =
(case partition3 x ys of (ls, es, gs) \<Rightarrow> if y < x then (y # ls, es, gs) else if x = y then (ls, y # es, gs) else (ls, es, y # gs))" by (auto simp: partition3_def)
lemma length_partition3: assumes"partition3 x xs = (ls, es, gs)" shows"length xs = length ls + length es + length gs" using assms by (induction xs arbitrary: ls es gs)
(auto simp: partition3_code split: if_splits prod.splits)
lemma sort_append: assumes"\x\set xs. \y\set ys. x \ y" shows"sort (xs @ ys) = sort xs @ sort ys" using assms by (intro properties_for_sort) (auto simp: sorted_append)
lemma select_append: assumes"\y\set ys. \z\set zs. y \ z" shows"k < length ys \ select k (ys @ zs) = select k ys" and"k \ {length ys..
select k (ys @ zs) = select (k - length ys) zs" using assms by (simp_all add: select_def sort_append nth_append)
lemma select_append': assumes"\y\set ys. \z\set zs. y \ z" and "k < length ys + length zs" shows"select k (ys @ zs) = (if k < length ys then select k ys else select (k - length ys) zs)" using assms by (auto intro!: select_append)
theorem select_rec_partition: assumes"k < length xs" shows"select k xs = ( let (ls, es, gs) = partition3 x xs in if k < length ls then select k ls
else if k < length ls + length es then x
else select (k - length ls - length es) gs
)" (is "_ = ?rhs") proof -
define ls es gs where"ls = filter (\y. y < x) xs" and "es = filter (\y. y = x) xs" and"gs = filter (\y. y > x) xs"
define nl ne where [simp]: "nl = length ls""ne = length es" have mset_eq: "mset xs = mset ls + mset es + mset gs" unfolding ls_def es_def gs_def by (induction xs) auto have length_eq: "length xs = length ls + length es + length gs" unfolding ls_def es_def gs_def using [[simp_depth_limit = 1]] by (induction xs) auto have [simp]: "select i es = x"if"i < length es"for i proof - have"select i es \ set (sort es)" unfolding select_def using that by (intro nth_mem) auto thus ?thesis by (auto simp: es_def) qed
have"select k xs = select k (ls @ (es @ gs))" by (intro select_mset_cong) (simp_all add: mset_eq) alsohave"\ = (if k < nl then select k ls else select (k - nl) (es @ gs))" unfolding nl_ne_def using assms by (intro select_append') (auto simp: ls_def es_def gs_def length_eq) alsohave"\ = (if k < nl then select k ls else if k < nl + ne then x
else select (k - nl - ne) gs)" proof (rule if_cong) assume"\k < nl" have"select (k - nl) (es @ gs) =
(if k - nl < ne then select (k - nl) es else select (k - nl - ne) gs)" unfolding nl_ne_def using assms \<open>\<not>k < nl\<close> by (intro select_append') (auto simp: ls_def es_def gs_def length_eq) alsohave"\ = (if k < nl + ne then x else select (k - nl - ne) gs)" using\<open>\<not>k < nl\<close> by auto finallyshow"select (k - nl) (es @ gs) = \" . qed simp_all alsohave"\ = ?rhs" by (simp add: partition3_def ls_def es_def gs_def) finallyshow ?thesis . qed
subsection \<open>The size of the lists in the recursive calls\<close>
text\<open>
We now derive an upper bound for the number of elements of a list that are smaller
(resp. bigger) than the median of medians with chopping size 5. To avoid having to do the
same proof twice, we do it generically for an operation \<open>\<prec>\<close> that we will later instantiate with either \<open><\<close> or \<open>>\<close>. \<close>
context fixes xs :: "'a :: linorder list" fixes M defines"M \ median (map median (chop 5 xs))" begin
lemma size_median_of_medians_aux: fixes R :: "'a :: linorder \ 'a \ bool" (infix \\\ 50) assumes R: "R \ {(<), (>)}" shows"size {#y \# mset xs. y \ M#} \ nat \0.7 * length xs + 3\" proof -
define n and m where [simp]: "n = length xs"and"m = length (chop 5 xs)" text\<open>We define an abbreviation for the multiset of all the chopped-up groups:\<close>
text\<open>We then split that multiset into those groups whose medians is less than @{term M} and the rest.\<close>
define Y_small (\<open>Y\<^sub>\<prec>\<close>) where "Y\<^sub>\<prec> = filter_mset (\<lambda>ys. median ys \<prec> M) (mset (chop 5 xs))"
define Y_big (\<open>Y\<^sub>\<succeq>\<close>) where "Y\<^sub>\<succeq> = filter_mset (\<lambda>ys. \<not>(median ys \<prec> M)) (mset (chop 5 xs))" have"m = size (mset (chop 5 xs))"by (simp add: m_def) alsohave"mset (chop 5 xs) = Y\<^sub>\ + Y\<^sub>\" unfolding Y_small_def Y_big_def by (rule multiset_partition) finallyhave m_eq: "m = size Y\<^sub>\ + size Y\<^sub>\" by simp
text\<open>At most half of the lists have a median that is smaller than the median of medians:\<close> have"size Y\<^sub>\ = size (image_mset median Y\<^sub>\)" by simp alsohave"image_mset median Y\<^sub>\ = {#y \# mset (map median (chop 5 xs)). y \ M#}" unfolding Y_small_def by (subst filter_mset_image_mset [symmetric]) simp_all alsohave"size \ \ (length (map median (chop 5 xs))) div 2" unfolding M_def using median_props[of "map median (chop 5 xs)"] R by auto alsohave"\ = m div 2" by (simp add: m_def) finallyhave size_Y\<^sub>_small: "size Y\<^sub>\<prec> \<le> m div 2" .
text\<open>We estimate the number of elements less than @{term M} by grouping them into elements
coming from @{term"Y\<^sub>\"} and elements coming from @{term "Y\<^sub>\"}:\ have"{#y \# mset xs. y \ M#} = {#y \# (\ys\chop 5 xs. mset ys). y \ M#}" by (subst sum_msets_chop) simp_all alsohave"\ = (\ys\chop 5 xs. {#y \# mset ys. y \ M#})" by (subst filter_mset_sum_list) (simp add: o_def) alsohave"\ = (\ys\#mset (chop 5 xs). {#y \# mset ys. y \ M#})" by (subst sum_mset_sum_list [symmetric]) simp_all alsohave"mset (chop 5 xs) = Y\<^sub>\ + Y\<^sub>\" by (simp add: Y_small_def Y_big_def not_le) alsohave"(\ys\#\. {#y \# mset ys. y \ M#}) =
(\<Sum>ys\<in>#Y\<^sub>\<prec>. {#y \<in># mset ys. y \<prec> M#}) + (\<Sum>ys\<in>#Y\<^sub>\<succeq>. {#y \<in># mset ys. y \<prec> M#})" by simp
text\<open>Next, we overapproximate the elements contributed by @{term "Y\<^sub>\<prec>"}: instead of those elements
that are smaller than the median, we take \<^emph>\<open>all\<close> the elements of each group. For the elements contributed by @{term"Y\<^sub>\"}, we overapproximate by taking all those that
are less than their median instead of only those that are less than @{term M}.\<close> alsohave"\ \# (\ys\#Y\<^sub>\. mset ys) + (\ys\#Y\<^sub>\. {#y \# mset ys. y \ median ys#})" using R by (intro subset_mset.add_mono sum_mset_mset_mono mset_filter_mono) (auto simp: Y_big_def) finallyhave"size {# y \# mset xs. y \ M#} \ size \" by (rule size_mset_mono) hence"size {# y \# mset xs. y \ M#} \
(\<Sum>ys\<in>#Y\<^sub>\<prec>. length ys) + (\<Sum>ys\<in>#Y\<^sub>\<succeq>. size {#y \<in># mset ys. y \<prec> median ys#})" by (simp add: size_mset_sum_mset_distrib multiset.map_comp o_def)
text\<open>Next, we further overapproximate the first sum by noting that each group has
at most size 5.\<close> alsohave"(\ys\#Y\<^sub>\. length ys) \ (\ys\#Y\<^sub>\. 5)" by (intro sum_mset_mono) (auto simp: Y_small_def length_chop_part_le) alsohave"\ = 5 * size Y\<^sub>\" by simp
text\<open>Next, we note that each group in @{term "Y\<^sub>\<succeq>"} can have at most 2 elements that are
smaller than its median.\<close> alsohave"(\ys\#Y\<^sub>\. size {#y \# mset ys. y \ median ys#}) \
(\<Sum>ys\<in>#Y\<^sub>\<succeq>. length ys div 2)" proof (intro sum_mset_mono, goal_cases) fix ys assume"ys \# Y\<^sub>\" hence"ys \ []" by (auto simp: Y_big_def) thus"size {#y \# mset ys. y \ median ys#} \ length ys div 2" using R median_props[of ys] by auto qed alsohave"\ \ (\ys\#Y\<^sub>\. 2)" by (intro sum_mset_mono div_le_mono diff_le_mono)
(auto simp: Y_big_def dest: length_chop_part_le) alsohave"\ = 2 * size Y\<^sub>\" by simp
text\<open>Simplifying gives us the main result.\<close> alsohave"5 * size Y\<^sub>\ + 2 * size Y\<^sub>\ = 2 * m + 3 * size Y\<^sub>\" by (simp add: m_eq) alsohave"\ \ 3.5 * m" using\<open>size Y\<^sub>\<prec> \<le> m div 2\<close> by linarith alsohave"\ = 3.5 * \n / 5\" by (simp add: m_def length_chop) alsohave"\ \ 0.7 * n + 3.5" by linarith finallyhave"size {#y \# mset xs. y \ M#} \ 0.7 * n + 3.5" by simp thus"size {#y \# mset xs. y \ M#} \ nat \0.7 * n + 3\" by linarith qed
lemma size_less_than_median_of_medians: "size {#y \# mset xs. y < M#} \ nat \0.7 * length xs + 3\" using size_median_of_medians_aux[of "(<)"] by simp
lemma size_greater_than_median_of_medians: "size {#y \# mset xs. y > M#} \ nat \0.7 * length xs + 3\" using size_median_of_medians_aux[of "(>)"] by simp
end
subsection \<open>Efficient algorithm\<close>
text\<open>
We handle the base cases and computing the median for the chopped-up sublists of size 5 using the naive selection algorithm where we sort the list using insertion sort. \<close> definition slow_select where "slow_select k xs = insort xs ! k"
definition slow_median where "slow_median xs = slow_select ((length xs - 1) div 2) xs"
lemma slow_select_correct: "slow_select k xs = select k xs" by (simp add: slow_select_def select_def insort_correct)
lemma slow_median_correct: "slow_median xs = median xs" by (simp add: median_def slow_median_def slow_select_correct)
text\<open>
The definition of the selection algorithm is complicated somewhat by the fact that its terminationis contingent on its correctness: if the first recursive call were to return an
element for\<open>x\<close> that is e.g. smaller than all list elements, the algorithm would not terminate.
Therefore, we first prove partial correctness, thentermination, andthen combine the two toobtain total correctness. \<close> function mom_select where "mom_select k xs = ( let n = length xs inif n \<le> 20 then
slow_select k xs
else let M = mom_select (((n + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs));
(ls, es, gs) = partition3 M xs;
nl = length ls in if k < nl then mom_select k ls
else let ne = length es inif k < nl + ne then M
else mom_select (k - nl - ne) gs
)" by auto
text\<open> If @{const "mom_select"} terminates, it agrees with @{const select}: \<close> lemma mom_select_correct_aux: assumes"mom_select_dom (k, xs)"and"k < length xs" shows"mom_select k xs = select k xs" using assms proof (induction rule: mom_select.pinduct) case (1 k xs) show"mom_select k xs = select k xs" proof (cases "length xs \ 20") case True thus"mom_select k xs = select k xs"using"1.prems""1.hyps" by (subst mom_select.psimps) (auto simp: select_def slow_select_correct) next case False
define x where "x = mom_select (((length xs + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))"
define ls es gs where"ls = filter (\y. y < x) xs" and "es = filter (\y. y = x) xs" and"gs = filter (\y. y > x) xs"
define nl ne where"nl = length ls"and"ne = length es" notedefs = nl_def ne_def x_def ls_def es_def gs_def have tw: "(ls, es, gs) = partition3 x xs" unfolding partition3_def defs One_nat_def .. have length_eq: "length xs = nl + ne + length gs" unfolding nl_def ne_def ls_def es_def gs_def using [[simp_depth_limit = 1]] by (induction xs) auto note IH = "1.IH"(2)[OF refl False x_def tw refl refl refl] "1.IH"(3)[OF refl False x_def tw refl refl refl _ refl]
have"mom_select k xs = (if k < nl then mom_select k ls else if k < nl + ne then x
else mom_select (k - nl - ne) gs)" using "1.hyps" False by (subst mom_select.psimps) (simp_all add: partition3_def flip: defs One_nat_def) alsohave"\ = (if k < nl then select k ls else if k < nl + ne then x
else select (k - nl - ne) gs)" using IH length_eq "1.prems"by (simp add: ls_def es_def gs_def nl_def ne_def)
try0 alsohave"\ = select k xs" using \k < length xs\ by (subst (3) select_rec_partition[of _ _ x]) (simp_all add: nl_def ne_def flip: tw) finallyshow"mom_select k xs = select k xs" . qed qed
text\<open>
@{const mom_select} indeed terminates for all inputs: \<close> lemma mom_select_termination: "All mom_select_dom" proof (relation "measure (length \ snd)"; (safe)?) fix k :: nat and xs :: "'a list" assume"\ length xs \ 20" thus"((((length xs + 4) div 5 - 1) div 2, map slow_median (chop 5 xs)), k, xs) \<in> measure (length \<circ> snd)" by (auto simp: length_chop nat_less_iff ceiling_less_iff) next fix k :: nat and xs ls es gs :: "'a list"
define x where"x = mom_select (((length xs + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))" assume A: "\ length xs \ 20" "(ls, es, gs) = partition3 x xs" "mom_select_dom (((length xs + 4) div 5 - 1) div 2,
map slow_median (chop 5 xs))" have less: "((length xs + 4) div 5 - 1) div 2 < nat \length xs / 5\" using A(1) by linarith
text\<open>For termination, it suffices to prove that @{term x} is in the list.\<close> have"x = select (((length xs + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))" using less unfolding x_def by (intro mom_select_correct_aux A) (auto simp: length_chop) alsohave"\ \ set (map slow_median (chop 5 xs))" using less by (intro select_in_set) (simp_all add: length_chop) alsohave"\ \ set xs" unfolding set_map proof safe fix ys assume ys: "ys \ set (chop 5 xs)" hence"median ys \ set ys" by auto alsohave"set ys \ \(set ` set (chop 5 xs))" using ys by blast alsohave"\ = set xs" by (rule UN_sets_chop) simp_all finallyshow"slow_median ys \ set xs" by (simp add: slow_median_correct) qed finallyhave"x \ set xs" . thus"((k, ls), k, xs) \ measure (length \ snd)" and"((k - length ls - length es, gs), k, xs) \ measure (length \ snd)" using A(1,2) by (auto simp: partition3_def intro!: length_filter_less[of x]) qed
termination mom_select by (rule mom_select_termination)
lemmas [simp del] = mom_select.simps
lemma mom_select_correct: "k < length xs \ mom_select k xs = select k xs" using mom_select_correct_aux and mom_select_termination by blast
subsection \<open>Running time analysis\<close>
time_fun partition3 equations partition3_code
lemma T_partition3: "T_partition3 x xs = length xs + 1" by (induction x xs rule: T_partition3.induct) auto
lemma T_chop_reduce: "n > 0 \ xs \ [] \ T_chop n xs = T_take n xs + T_drop n xs + T_chop n (drop n xs) + 1" by (cases n; cases xs) (auto simp: T_chop.simps)
lemma T_chop_le: "T_chop d xs \ 5 * length xs + 1" by (induction d xs rule: T_chop.induct) (auto simp: T_chop_reduce T_take T_drop)
time_fun mom_select
lemmas [simp del] = T_mom_select.simps
lemma T_mom_select_simps: "length xs \ 20 \ T_mom_select k xs = T_slow_select k xs + T_length xs + 1" "length xs > 20 \ T_mom_select k xs = ( let xss = chop 5 xs;
ms = map slow_median xss;
idx = (((length xs + 4) div 5 - 1) div 2);
x = mom_select idx ms;
(ls, es, gs) = partition3 x xs;
nl = length ls;
ne = length es in
(if k < nl then T_mom_select k ls
else T_length es + (if k < nl + ne then 0 else T_mom_select (k - nl - ne) gs)) +
T_mom_select idx ms + T_chop 5 xs + T_map T_slow_median xss +
T_partition3 x xs + T_length ls + T_length xs + 1
)" by (subst T_mom_select.simps; simp add: Let_def case_prod_unfold)+
function T'_mom_select :: "nat \ nat" where "T'_mom_select n =
(if n \<le> 20 then
483
else
T'_mom_select (nat \0.2*n\) + T'_mom_select (nat \0.7*n+3\) + 19 * n + 54)" by force+ terminationby (relation "measure id"; simp; linarith)
lemmas [simp del] = T'_mom_select.simps
lemma T'_mom_select_ge: "T'_mom_select n \<ge> 483" by (induction n rule: T'_mom_select.induct; subst T'_mom_select.simps) auto
lemma T'_mom_select_mono: "m \ n \ T'_mom_select m \ T'_mom_select n" proof (induction n arbitrary: m rule: less_induct) case (less n m) show ?case proof (cases "m \ 20") case True hence"T'_mom_select m = 483" by (subst T'_mom_select.simps) auto alsohave"\ \ T'_mom_select n" by (rule T'_mom_select_ge) finallyshow ?thesis . next case False hence"T'_mom_select m =
T'_mom_select (nat \0.2*m\) + T'_mom_select (nat \0.7*m + 3\) + 19 * m + 54" by (subst T'_mom_select.simps) auto alsohave"\ \ T'_mom_select (nat \0.2*n\) + T'_mom_select (nat \0.7*n + 3\) + 19 * n + 54" using\<open>m \<le> n\<close> and False by (intro add_mono less.IH; linarith) alsohave"\ = T'_mom_select n" using\<open>m \<le> n\<close> and False by (subst T'_mom_select.simps) auto finallyshow ?thesis . qed qed
lemma T_mom_select_le_aux: assumes"k < length xs" shows"T_mom_select k xs \ T'_mom_select (length xs)" using assms proof (induction k xs rule: T_mom_select.induct) case (1 k xs)
define n where [simp]: "n = length xs"
define x where "x = mom_select (((n + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))"
define ls es gs where"ls = filter (\y. y < x) xs" and "es = filter (\y. y = x) xs" and"gs = filter (\y. y > x) xs"
define nl ne where"nl = length ls"and"ne = length es" notedefs = nl_def ne_def x_def ls_def es_def gs_def have tw: "(ls, es, gs) = partition3 x xs" unfolding partition3_def defs One_nat_def .. note IH = "1.IH"(1)[OF n_def] "1.IH"(2)[OF n_def _ x_def tw refl refl nl_def] "1.IH"(3)[OF n_def _ x_def tw refl refl nl_def _ ne_def]
show ?case proof (cases "length xs \ 20") case True \<comment> \<open>base case\<close> hence"T_mom_select k xs \ (length xs)\<^sup>2 + 4 * length xs + 3" using T_slow_select_le[of k xs] \<open>k < length xs\<close> by (subst T_mom_select_simps(1)) (auto simp: T_length) alsohave"\ \ 20\<^sup>2 + 4 * 20 + 3" using True by (intro add_mono power_mono) auto alsohave"\ = 483" by simp alsohave"\ = T'_mom_select (length xs)" using True by (simp add: T'_mom_select.simps) finallyshow ?thesis by simp next case False \<comment> \<open>recursive case\<close> have"((n + 4) div 5 - 1) div 2 < nat \n / 5\" using False unfolding n_def by linarith hence"x = select (((n + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))" unfolding x_def n_def by (intro mom_select_correct) (auto simp: length_chop) alsohave"((n + 4) div 5 - 1) div 2 = (nat \n / 5\ - 1) div 2" by linarith alsohave"select \ (map slow_median (chop 5 xs)) = median (map slow_median (chop 5 xs))" by (auto simp: median_def length_chop) finallyhave x_eq: "x = median (map slow_median (chop 5 xs))" .
text\<open>The cost of computing the medians of all the subgroups:\<close>
define T_ms where"T_ms = T_map T_slow_median (chop 5 xs)" have"T_ms \ 10 * n + 48" proof - have"T_ms = (\ys\chop 5 xs. T_slow_median ys) + length (chop 5 xs) + 1" by (simp add: T_ms_def T_map) alsohave"(\ys\chop 5 xs. T_slow_median ys) \ (\ys\chop 5 xs. 47)" proof (intro sum_list_mono) fix ys assume"ys \ set (chop 5 xs)" hence"length ys \ 5" "ys \ []" using length_chop_part_le[of ys 5 xs] by auto from\<open>ys \<noteq> []\<close> have "T_slow_median ys \<le> (length ys) ^ 2 + 4 * length ys + 2" by (rule T_slow_median_le) alsohave"\ \ 5 ^ 2 + 4 * 5 + 2" using\<open>length ys \<le> 5\<close> by (intro add_mono power_mono) auto finallyshow"T_slow_median ys \ 47" by simp qed alsohave"(\ys\chop 5 xs. 47) + length (chop 5 xs) + 1 =
48 * nat \<lceil>real n / 5\<rceil> + 1" by (simp add: map_replicate_const length_chop) alsohave"\ \ 10 * n + 48" by linarith finallyshow"T_ms \ 10 * n + 48" by simp qed
text\<open>The cost of the first recursive call (to compute the median of medians):\<close>
define T_rec1 where "T_rec1 = T_mom_select (((n + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs))" from False have"((length xs + 4) div 5 - Suc 0) div 2 < nat \real (length xs) / 5\" by linarith hence"T_rec1 \ T'_mom_select (length (map slow_median (chop 5 xs)))" using False unfolding T_rec1_def by (intro IH(1)) (auto simp: length_chop) hence"T_rec1 \ T'_mom_select (nat \0.2 * n\)" by (simp add: length_chop)
text\<open>The cost of the second recursive call (to compute the final result):\<close>
define T_rec2 where"T_rec2 = (if k < nl then T_mom_select k ls
else if k < nl + ne then 0
else T_mom_select (k - nl - ne) gs)"
consider "k < nl" | "k \ {nl.. nl+ne" by force hence"T_rec2 \ T'_mom_select (nat \0.7 * n + 3\)" proof cases assume"k < nl" hence"T_rec2 = T_mom_select k ls" by (simp add: T_rec2_def) alsohave"\ \ T'_mom_select (length ls)" by (rule IH(2)) (use\<open>k < nl\<close> False in \<open>auto simp: defs\<close>) alsohave"length ls \ nat \0.7 * n + 3\" unfolding ls_def using size_less_than_median_of_medians[of xs] by (auto simp: length_filter_conv_size_filter_mset slow_median_correct[abs_def] x_eq) hence"T'_mom_select (length ls) \ T'_mom_select (nat \0.7 * n + 3\)" by (rule T'_mom_select_mono) finallyshow ?thesis . next assume"k \ {nl.. hence"T_rec2 = 0" by (simp add: T_rec2_def) thus ?thesis using T'_mom_select_ge[of "nat \0.7 * n + 3\"] by simp next assume"k \ nl + ne" hence"T_rec2 = T_mom_select (k - nl - ne) gs" by (simp add: T_rec2_def) alsohave"\ \ T'_mom_select (length gs)" proof (rule IH(3)) show"\n \ 20" using False by auto show"\ k < nl" "\k < nl + ne" using\<open>k \<ge> nl + ne\<close> by (auto simp: nl_def ne_def) have"length xs = nl + ne + length gs" unfoldingdefsby (rule length_partition3) (simp_all add: partition3_def) thus"k - nl - ne < length gs" using\<open>k \<ge> nl + ne\<close> \<open>k < length xs\<close> by (auto simp: nl_def ne_def) qed alsohave"length gs \ nat \0.7 * n + 3\" unfolding gs_def using size_greater_than_median_of_medians[of xs] by (auto simp: length_filter_conv_size_filter_mset slow_median_correct[abs_def] x_eq) hence"T'_mom_select (length gs) \ T'_mom_select (nat \0.7 * n + 3\)" by (rule T'_mom_select_mono) finallyshow ?thesis . qed
text\<open>Now for the final inequality chain:\<close> have"T_mom_select k xs =
(if k < nl then T_mom_select k ls
else T_length es +
(if k < nl + ne then 0 else T_mom_select (k - nl - ne) gs)) +
T_mom_select (((n + 4) div 5 - 1) div 2) (map slow_median (chop 5 xs)) +
T_chop 5 xs + T_map T_slow_median (chop 5 xs) + T_partition3 x xs +
T_length ls + T_length xs + 1" using False by (subst T_mom_select_simps;
unfold Let_def n_def [symmetric] x_def [symmetric] nl_def [symmetric]
ne_def [symmetric] prod.case tw [symmetric]) simp_all alsohave"\ \ T_rec2 + T_rec1 + T_ms + 2 * n + nl + ne + T_chop 5 xs + 5" using False by (auto simp add: T_rec1_def T_rec2_def T_partition3
T_length T_ms_def nl_def ne_def) alsohave"nl \ n" by (simp add: nl_def ls_def) alsohave"ne \ n" by (simp add: ne_def es_def) alsonote\<open>T_ms \<le> 10 * n + 48\<close> alsohave"T_chop 5 xs \ 5 * n + 1" using T_chop_le[of 5 xs] by simp alsonote\<open>T_rec1 \<le> T'_mom_select (nat \<lceil>0.2*n\<rceil>)\<close> alsonote\<open>T_rec2 \<le> T'_mom_select (nat \<lceil>0.7*n + 3\<rceil>)\<close> finallyhave"T_mom_select k xs \
T'_mom_select (nat \0.7*n + 3\) + T'_mom_select (nat \0.2*n\) + 19 * n + 54" by simp alsohave"\ = T'_mom_select n" using False by (subst T'_mom_select.simps) auto finallyshow ?thesis by simp qed qed
subsection \<open>Akra--Bazzi Light\<close>
lemma akra_bazzi_light_aux1: fixes a b :: real and n n0 :: nat assumes ab: "a > 0""a < 1""n > n0" assumes"n0 \ (max 0 b + 1) / (1 - a)" shows"nat \a*n+b\ < n" proof - have"a * real n + max 0 b \ 0" using ab by simp hence"real (nat \a*n+b\) \ a * n + max 0 b + 1" by linarith also { have"n0 \ (max 0 b + 1) / (1 - a)" by fact alsohave"\ < real n" using assms by simp finallyhave"a * real n + max 0 b + 1 < real n" using ab by (simp add: field_simps)
} finallyshow"nat \a*n+b\ < n" using\<open>n > n0\<close> by linarith qed
lemma akra_bazzi_light_aux2: fixes f :: "nat \ real" fixes n\<^sub>0 :: nat and a b c d :: real and C1 C2 C\<^sub>1 C\<^sub>2 :: real assumes bounds: "a > 0""c > 0""a + c < 1""C\<^sub>1 \ 0" assumes rec: "\n>n\<^sub>0. f n = f (nat \a*n+b\) + f (nat \c*n+d\) + C\<^sub>1 * n + C\<^sub>2" assumes ineqs: "n\<^sub>0 > (max 0 b + max 0 d + 2) / (1 - a - c)" "C\<^sub>3 \ C\<^sub>1 / (1 - a - c)" "C\<^sub>3 \ (C\<^sub>1 * n\<^sub>0 + C\<^sub>2 + C\<^sub>4) / ((1 - a - c) * n\<^sub>0 - max 0 b - max 0 d - 2)" "\n\n\<^sub>0. f n \ C\<^sub>4" shows"f n \ C\<^sub>3 * n + C\<^sub>4" proof (induction n rule: less_induct) case (less n) have"0 \ C\<^sub>1 / (1 - a - c)" using bounds by auto alsohave"\ \ C\<^sub>3" by fact finallyhave"C\<^sub>3 \ 0" .
show ?case proof (cases "n > n\<^sub>0") case False hence"f n \ C\<^sub>4" using ineqs(4) by auto alsohave"\ \ C\<^sub>3 * real n + C\<^sub>4" using bounds \<open>C\<^sub>3 \<ge> 0\<close> by auto finallyshow ?thesis . next case True have nonneg: "a * n \ 0" "c * n \ 0" using bounds by simp_all
have"(max 0 b + 1) / (1 - a) \ (max 0 b + max 0 d + 2) / (1 - a - c)" using bounds by (intro frac_le) auto hence"n\<^sub>0 \ (max 0 b + 1) / (1 - a)" using ineqs(1) by linarith hence rec_less1: "nat \a*n+b\ < n" using bounds \<open>n > n\<^sub>0\<close> by (intro akra_bazzi_light_aux1[of _ n\<^sub>0]) auto
have"(max 0 d + 1) / (1 - c) \ (max 0 b + max 0 d + 2) / (1 - a - c)" using bounds by (intro frac_le) auto hence"n\<^sub>0 \ (max 0 d + 1) / (1 - c)" using ineqs(1) by linarith hence rec_less2: "nat \c*n+d\ < n" using bounds \<open>n > n\<^sub>0\<close> by (intro akra_bazzi_light_aux1[of _ n\<^sub>0]) auto
have"f n = f (nat \a*n+b\) + f (nat \c*n+d\) + C\<^sub>1 * n + C\<^sub>2" using\<open>n > n\<^sub>0\<close> by (subst rec) auto alsohave"\ \ (C\<^sub>3 * nat \a*n+b\ + C\<^sub>4) + (C\<^sub>3 * nat \c*n+d\ + C\<^sub>4) + C\<^sub>1 * n + C\<^sub>2" using rec_less1 rec_less2 by (intro add_mono less.IH) auto alsohave"\ \ (C\<^sub>3 * (a*n+max 0 b+1) + C\<^sub>4) + (C\<^sub>3 * (c*n+max 0 d+1) + C\<^sub>4) + C\<^sub>1 * n + C\<^sub>2" using bounds \<open>C\<^sub>3 \<ge> 0\<close> nonneg by (intro add_mono mult_left_mono order.refl; linarith) alsohave"\ = C\<^sub>3 * n + ((C\<^sub>3 * (max 0 b + max 0 d + 2) + 2 * C\<^sub>4 + C\<^sub>2) -
(C\<^sub>3 * (1 - a - c) - C\<^sub>1) * n)" by (simp add: algebra_simps) alsohave"\ \ C\<^sub>3 * n + ((C\<^sub>3 * (max 0 b + max 0 d + 2) + 2 * C\<^sub>4 + C\<^sub>2) -
(C\<^sub>3 * (1 - a - c) - C\<^sub>1) * n\<^sub>0)" using\<open>n > n\<^sub>0\<close> ineqs(2) bounds by (intro add_mono diff_mono order.refl mult_left_mono) (auto simp: field_simps) alsohave"(C\<^sub>3 * (max 0 b + max 0 d + 2) + 2 * C\<^sub>4 + C\<^sub>2) - (C\<^sub>3 * (1 - a - c) - C\<^sub>1) * n\<^sub>0 \ C\<^sub>4" using ineqs bounds by (simp add: field_simps) finallyshow"f n \ C\<^sub>3 * real n + C\<^sub>4" by (simp add: mult_right_mono) qed qed
lemma akra_bazzi_light: fixes f :: "nat \ real" fixes n\<^sub>0 :: nat and a b c d C\<^sub>1 C\<^sub>2 :: real assumes bounds: "a > 0""c > 0""a + c < 1""C\<^sub>1 \ 0" assumes rec: "\n>n\<^sub>0. f n = f (nat \a*n+b\) + f (nat \c*n+d\) + C\<^sub>1 * n + C\<^sub>2" shows"\C\<^sub>3 C\<^sub>4. \n. f n \ C\<^sub>3 * real n + C\<^sub>4" proof -
define n\<^sub>0' where "n\<^sub>0' = max n\<^sub>0 (nat \<lceil>(max 0 b + max 0 d + 2) / (1 - a - c) + 1\<rceil>)"
define C\<^sub>4 where "C\<^sub>4 = Max (f ` {..n\<^sub>0'})"
define C\<^sub>3 where "C\<^sub>3 = max (C\<^sub>1 / (1 - a - c))
((C\<^sub>1 * n\<^sub>0' + C\<^sub>2 + C\<^sub>4) / ((1 - a - c) * n\<^sub>0' - max 0 b - max 0 d - 2))"
have"f n \ C\<^sub>3 * n + C\<^sub>4" for n proof (rule akra_bazzi_light_aux2[OF bounds _]) show"\n>n\<^sub>0'. f n = f (nat \a*n+b\) + f (nat \c*n+d\) + C\<^sub>1 * n + C\<^sub>2" using rec by (auto simp: n\<^sub>0'_def) next show"C\<^sub>3 \ C\<^sub>1 / (1 - a - c)" and"C\<^sub>3 \ (C\<^sub>1 * n\<^sub>0' + C\<^sub>2 + C\<^sub>4) / ((1 - a - c) * n\<^sub>0' - max 0 b - max 0 d - 2)" by (simp_all add: C\<^sub>3_def) next have"(max 0 b + max 0 d + 2) / (1 - a - c) < nat \(max 0 b + max 0 d + 2) / (1 - a - c) + 1\" by linarith alsohave"\ \ n\<^sub>0'" by (simp add: n\<^sub>0'_def) finallyshow"(max 0 b + max 0 d + 2) / (1 - a - c) < real n\<^sub>0'" . next show"\n\n\<^sub>0'. f n \ C\<^sub>4" by (auto simp: C\<^sub>4_def) qed thus ?thesis by blast qed
lemma akra_bazzi_light_nat: fixes f :: "nat \ nat" fixes n\<^sub>0 :: nat and a b c d :: real and C\<^sub>1 C\<^sub>2 :: nat assumes bounds: "a > 0""c > 0""a + c < 1""C\<^sub>1 \ 0" assumes rec: "\n>n\<^sub>0. f n = f (nat \a*n+b\) + f (nat \c*n+d\) + C\<^sub>1 * n + C\<^sub>2" shows"\C\<^sub>3 C\<^sub>4. \n. f n \ C\<^sub>3 * n + C\<^sub>4" proof - have"\C\<^sub>3 C\<^sub>4. \n. real (f n) \ C\<^sub>3 * real n + C\<^sub>4" using assms by (intro akra_bazzi_light[of a c C\<^sub>1 n\<^sub>0 f b d C\<^sub>2]) auto thenobtain C\<^sub>3 C\<^sub>4 where le: "\<forall>n. real (f n) \<le> C\<^sub>3 * real n + C\<^sub>4" by blast have"f n \ nat \C\<^sub>3\ * n + nat \C\<^sub>4\" for n proof - have"real (f n) \ C\<^sub>3 * real n + C\<^sub>4" using le by blast alsohave"\ \ real (nat \C\<^sub>3\) * real n + real (nat \C\<^sub>4\)" by (intro add_mono mult_right_mono; linarith) alsohave"\ = real (nat \C\<^sub>3\ * n + nat \C\<^sub>4\)" by simp finallyshow ?thesis by linarith qed thus ?thesis by blast qed
lemma T'_mom_select_le': "\C\<^sub>1 C\<^sub>2. \n. T'_mom_select n \ C\<^sub>1 * n + C\<^sub>2" proof (rule akra_bazzi_light_nat) show"\n>20. T'_mom_select n = T'_mom_select (nat \0.2 * n + 0\) +
T'_mom_select (nat \0.7 * n + 3\) + 19 * n + 54" using T'_mom_select.simps by auto qed auto
end
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.24Bemerkung:
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.