(* Title: HOL/Probability/Information.thy Author: Johannes Hölzl, TU München Author: Armin Heller, TU München
*)
section \<open>Information theory\<close>
theory Information imports
Independent_Family begin
subsection "Information theory"
locale information_space = prob_space + fixes b :: real assumes b_gt_1: "1 < b"
text\<open>Introduce some simplification rules for logarithm of base \<^term>\<open>b\<close>.\<close> lemmas log_simps = log_mult log_inverse log_divide
subsection "Kullback$-$Leibler divergence"
text\<open>The Kullback$-$Leibler divergence is also known as relative entropy or
Kullback$-$Leibler distance.\<close>
definition "entropy_density b M N = log b \ enn2real \ RN_deriv M N"
definition "KL_divergence b M N = integral\<^sup>L N (entropy_density b M N)"
lemma measurable_entropy_density[measurable]: "entropy_density b M N \ borel_measurable M" unfolding entropy_density_def by auto
lemma (in sigma_finite_measure) KL_density: fixes f :: "'a \ real" assumes"1 < b" assumes f[measurable]: "f \ borel_measurable M" and nn: "AE x in M. 0 \ f x" shows"KL_divergence b M (density M f) = (\x. f x * log b (f x) \M)" unfolding KL_divergence_def proof (subst integral_real_density) show [measurable]: "entropy_density b M (density M (\x. ennreal (f x))) \ borel_measurable M" using f by (auto simp: comp_def entropy_density_def) have"density M (RN_deriv M (density M f)) = density M f" using f nn by (intro density_RN_deriv_density) auto thenhave eq: "AE x in M. RN_deriv M (density M f) x = f x" using f nn by (intro density_unique) auto have"AE x in M. f x * entropy_density b M (density M (\x. ennreal (f x))) x =
f x * log b (f x)" using eq nn by (auto simp: entropy_density_def) thenshow"(\x. f x * entropy_density b M (density M (\x. ennreal (f x))) x \M) = (\x. f x * log b (f x) \M)" by (intro integral_cong_AE) measurable qed fact+
lemma (in sigma_finite_measure) KL_density_density: fixes f g :: "'a \ real" assumes"1 < b" assumes f: "f \ borel_measurable M" "AE x in M. 0 \ f x" assumes g: "g \ borel_measurable M" "AE x in M. 0 \ g x" assumes ac: "AE x in M. f x = 0 \ g x = 0" shows"KL_divergence b (density M f) (density M g) = (\x. g x * log b (g x / f x) \M)" proof - interpret Mf: sigma_finite_measure "density M f" using f by (subst sigma_finite_iff_density_finite) auto have"KL_divergence b (density M f) (density M g) =
KL_divergence b (density M f) (density (density M f) (\<lambda>x. g x / f x))" using f g ac by (subst density_density_divide) simp_all alsohave"\ = (\x. (g x / f x) * log b (g x / f x) \density M f)" using f g \<open>1 < b\<close> by (intro Mf.KL_density) (auto simp: AE_density) alsohave"\ = (\x. g x * log b (g x / f x) \M)" using ac f g \<open>1 < b\<close> by (subst integral_density) (auto intro!: integral_cong_AE) finallyshow ?thesis . qed
lemma (in information_space) KL_gt_0: fixes D :: "'a \ real" assumes"prob_space (density M D)" assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" assumes int: "integrable M (\x. D x * log b (D x))" assumes A: "density M D \ M" shows"0 < KL_divergence b M (density M D)" proof - interpret N: prob_space "density M D"by fact
obtain A where"A \ sets M" "emeasure (density M D) A \ emeasure M A" using measure_eqI[of "density M D" M] \<open>density M D \<noteq> M\<close> by auto
let ?D_set = "{x\space M. D x \ 0}" have [simp, intro]: "?D_set \ sets M" using D by auto
have D_neg: "(\\<^sup>+ x. ennreal (- D x) \M) = 0" using D by (subst nn_integral_0_iff_AE) (auto simp: ennreal_neg)
have"(\\<^sup>+ x. ennreal (D x) \M) = emeasure (density M D) (space M)" using D by (simp add: emeasure_density cong: nn_integral_cong) thenhave D_pos: "(\\<^sup>+ x. ennreal (D x) \M) = 1" using N.emeasure_space_1 by simp
have"integrable M D" using D D_pos D_neg unfolding real_integrable_def real_lebesgue_integral_def by simp_all thenhave"integral\<^sup>L M D = 1" using D D_pos D_neg by (simp add: real_lebesgue_integral_def)
have"0 \ 1 - measure M ?D_set" using prob_le_1 by (auto simp: field_simps) alsohave"\ = (\ x. D x - indicator ?D_set x \M)" using\<open>integrable M D\<close> \<open>integral\<^sup>L M D = 1\<close> by (simp add: emeasure_eq_measure) alsohave"\ < (\ x. D x * (ln b * log b (D x)) \M)" proof (rule integral_less_AE) show"integrable M (\x. D x - indicator ?D_set x)" using\<open>integrable M D\<close> by (auto simp: less_top[symmetric]) next from integrable_mult_left(1)[OF int, of "ln b"] show"integrable M (\x. D x * (ln b * log b (D x)))" by (simp add: ac_simps) next show"emeasure M {x\space M. D x \ 1 \ D x \ 0} \ 0" proof assume eq_0: "emeasure M {x\space M. D x \ 1 \ D x \ 0} = 0" thenhave disj: "AE x in M. D x = 1 \ D x = 0" using D(1) by (auto intro!: AE_I[OF subset_refl] sets.sets_Collect)
have"emeasure M {x\space M. D x = 1} = (\\<^sup>+ x. indicator {x\space M. D x = 1} x \M)" using D(1) by auto alsohave"\ = (\\<^sup>+ x. ennreal (D x) \M)" using disj by (auto intro!: nn_integral_cong_AE simp: indicator_def one_ennreal_def) finallyhave"AE x in M. D x = 1" using D D_pos by (intro AE_I_eq_1) auto thenhave"(\\<^sup>+x. indicator A x\M) = (\\<^sup>+x. ennreal (D x) * indicator A x\M)" by (intro nn_integral_cong_AE) (auto simp: one_ennreal_def[symmetric]) alsohave"\ = density M D A" using\<open>A \<in> sets M\<close> D by (simp add: emeasure_density) finallyshow False using\<open>A \<in> sets M\<close> \<open>emeasure (density M D) A \<noteq> emeasure M A\<close> by simp qed show"{x\space M. D x \ 1 \ D x \ 0} \ sets M" using D(1) by (auto intro: sets.sets_Collect_conj)
have False if Dt: "t \ space M" "D t \ 1" "D t \ 0" "0 \ D t" and eq: "D t - indicator ?D_set t = D t * (ln b * log b (D t))"for t proof - have"D t - 1 = D t - indicator ?D_set t" using Dt by simp alsonote eq alsohave"D t * (ln b * log b (D t)) = - D t * ln (1 / D t)" using b_gt_1 \<open>D t \<noteq> 0\<close> \<open>0 \<le> D t\<close> by (simp add: log_def ln_div less_le) finallyhave"ln (1 / D t) = 1 / D t - 1" using\<open>D t \<noteq> 0\<close> by (auto simp: field_simps) from ln_eq_minus_one[OF _ this] \<open>D t \<noteq> 0\<close> \<open>0 \<le> D t\<close> \<open>D t \<noteq> 1\<close> show False by auto qed with D(2) show"AE t in M. t \ {x\space M. D x \ 1 \ D x \ 0} \
D t - indicator ?D_set t \<noteq> D t * (ln b * log b (D t))" by fastforce
show"AE t in M. D t - indicator ?D_set t \ D t * (ln b * log b (D t))" using D(2) AE_space proof eventually_elim fix t assume"t \ space M" "0 \ D t" show"D t - indicator ?D_set t \ D t * (ln b * log b (D t))" proof cases assume asm: "D t \ 0" thenhave"0 < D t"using\<open>0 \<le> D t\<close> by auto thenhave"0 < 1 / D t"by auto have"D t - indicator ?D_set t \ - D t * (1 / D t - 1)" using asm \<open>t \<in> space M\<close> by (simp add: field_simps) alsohave"- D t * (1 / D t - 1) \ - D t * ln (1 / D t)" using ln_le_minus_one \<open>0 < 1 / D t\<close> by (intro mult_left_mono_neg) auto alsohave"\ = D t * (ln b * log b (D t))" using\<open>0 < D t\<close> b_gt_1 by (simp_all add: log_def ln_div) finallyshow ?thesis by simp qed simp qed qed alsohave"\ = (\ x. ln b * (D x * log b (D x)) \M)" by (simp add: ac_simps) alsohave"\ = ln b * (\ x. D x * log b (D x) \M)" using int by simp finallyshow ?thesis using b_gt_1 D by (subst KL_density) (auto simp: zero_less_mult_iff) qed
lemma (in sigma_finite_measure) KL_same_eq_0: "KL_divergence b M M = 0" proof - have"AE x in M. 1 = RN_deriv M M x" proof (rule RN_deriv_unique) show"density M (\x. 1) = M" by (simp add: density_1) qed auto thenhave"AE x in M. log b (enn2real (RN_deriv M M x)) = 0" by (elim AE_mp) simp from integral_cong_AE[OF _ _ this] have"integral\<^sup>L M (entropy_density b M M) = 0" by (simp add: entropy_density_def comp_def) thenshow"KL_divergence b M M = 0" unfolding KL_divergence_def by auto qed
lemma (in information_space) KL_eq_0_iff_eq: fixes D :: "'a \ real" assumes"prob_space (density M D)" assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" assumes int: "integrable M (\x. D x * log b (D x))" shows"KL_divergence b M (density M D) = 0 \ density M D = M" using KL_same_eq_0[of b] KL_gt_0[OF assms] by (auto simp: less_le)
lemma (in information_space) KL_eq_0_iff_eq_ac: fixes D :: "'a \ real" assumes"prob_space N" assumes ac: "absolutely_continuous M N""sets N = sets M" assumes int: "integrable N (entropy_density b M N)" shows"KL_divergence b M N = 0 \ N = M" proof - interpret N: prob_space N by fact have"finite_measure N"by unfold_locales from real_RN_deriv[OF this ac] obtain D where D: "random_variable borel D" "AE x in M. RN_deriv M N x = ennreal (D x)" "AE x in N. 0 < D x" "\x. 0 \ D x" by this auto
have"N = density M (RN_deriv M N)" using ac by (rule density_RN_deriv[symmetric]) alsohave"\ = density M D" using D by (auto intro!: density_cong) finallyhave N: "N = density M D" .
from absolutely_continuous_AE[OF ac(2,1) D(2)] D b_gt_1 ac measurable_entropy_density have"integrable N (\x. log b (D x))" by (intro integrable_cong_AE[THEN iffD2, OF _ _ _ int])
(auto simp: N entropy_density_def) with D b_gt_1 have"integrable M (\x. D x * log b (D x))" by (subst integrable_real_density[symmetric]) (auto simp: N[symmetric] comp_def) with\<open>prob_space N\<close> D show ?thesis unfolding N by (intro KL_eq_0_iff_eq) auto qed
lemma (in information_space) KL_nonneg: assumes"prob_space (density M D)" assumes D: "D \ borel_measurable M" "AE x in M. 0 \ D x" assumes int: "integrable M (\x. D x * log b (D x))" shows"0 \ KL_divergence b M (density M D)" using KL_gt_0[OF assms] by (cases "density M D = M") (auto simp: KL_same_eq_0)
lemma (in sigma_finite_measure) KL_density_density_nonneg: fixes f g :: "'a \ real" assumes"1 < b" assumes f: "f \ borel_measurable M" "AE x in M. 0 \ f x" "prob_space (density M f)" assumes g: "g \ borel_measurable M" "AE x in M. 0 \ g x" "prob_space (density M g)" assumes ac: "AE x in M. f x = 0 \ g x = 0" assumes int: "integrable M (\x. g x * log b (g x / f x))" shows"0 \ KL_divergence b (density M f) (density M g)" proof - interpret Mf: prob_space "density M f"by fact interpret Mf: information_space "density M f" b by standard fact have eq: "density (density M f) (\x. g x / f x) = density M g" (is "?DD = _") using f g ac by (subst density_density_divide) simp_all
have"0 \ KL_divergence b (density M f) (density (density M f) (\x. g x / f x))" proof (rule Mf.KL_nonneg) show"prob_space ?DD"unfolding eq by fact from f g show"(\x. g x / f x) \ borel_measurable (density M f)" by auto show"AE x in density M f. 0 \ g x / f x" using f g by (auto simp: AE_density) show"integrable (density M f) (\x. g x / f x * log b (g x / f x))" using\<open>1 < b\<close> f g ac by (subst integrable_density)
(auto intro!: integrable_cong_AE[THEN iffD2, OF _ _ _ int] measurable_If) qed alsohave"\ = KL_divergence b (density M f) (density M g)" using f g ac by (subst density_density_divide) simp_all finallyshow ?thesis . qed
subsection \<open>Finite Entropy\<close>
definition (in information_space) finite_entropy :: "'b measure \ ('a \ 'b) \ ('b \ real) \ bool" where "finite_entropy S X f \
distributed M S X f \<and>
integrable S (\<lambda>x. f x * log b (f x)) \<and>
(\<forall>x\<in>space S. 0 \<le> f x)"
lemma (in information_space) finite_entropy_simple_function: assumes X: "simple_function M X" shows"finite_entropy (count_space (X`space M)) X (\a. measure M {x \ space M. X x = a})" unfolding finite_entropy_def proof safe have [simp]: "finite (X ` space M)" using X by (auto simp: simple_function_def) thenshow"integrable (count_space (X ` space M))
(\<lambda>x. prob {xa \<in> space M. X xa = x} * log b (prob {xa \<in> space M. X xa = x}))" by (rule integrable_count_space) have d: "distributed M (count_space (X ` space M)) X (\x. ennreal (if x \ X`space M then prob {xa \ space M. X xa = x} else 0))" by (rule distributed_simple_function_superset[OF X]) (auto intro!: arg_cong[where f=prob]) show"distributed M (count_space (X ` space M)) X (\x. ennreal (prob {xa \ space M. X xa = x}))" by (rule distributed_cong_density[THEN iffD1, OF _ _ _ d]) auto qed (rule measure_nonneg)
lemma ac_fst: assumes"sigma_finite_measure T" shows"absolutely_continuous S (distr (S \\<^sub>M T) S fst)" proof - interpret sigma_finite_measure T by fact
{ fix A assume A: "A \ sets S" "emeasure S A = 0" thenhave"fst -` A \ space (S \\<^sub>M T) = A \ space T" by (auto simp: space_pair_measure dest!: sets.sets_into_space) with A have"emeasure (S \\<^sub>M T) (fst -` A \ space (S \\<^sub>M T)) = 0" by (simp add: emeasure_pair_measure_Times) } thenshow ?thesis unfolding absolutely_continuous_def by (metis emeasure_distr measurable_fst null_setsD1 null_setsD2 null_setsI sets_distr subsetI) qed
lemma ac_snd: assumes"sigma_finite_measure T" shows"absolutely_continuous T (distr (S \\<^sub>M T) T snd)" proof - interpret sigma_finite_measure T by fact
{ fix A assume A: "A \ sets T" "emeasure T A = 0" thenhave"snd -` A \ space (S \\<^sub>M T) = space S \ A" by (auto simp: space_pair_measure dest!: sets.sets_into_space) with A have"emeasure (S \\<^sub>M T) (snd -` A \ space (S \\<^sub>M T)) = 0" by (simp add: emeasure_pair_measure_Times) } thenshow ?thesis unfolding absolutely_continuous_def by (metis emeasure_distr measurable_snd null_setsD1 null_setsD2 null_setsI sets_distr subsetI) qed
lemma (in information_space) finite_entropy_integrable: "finite_entropy S X Px \ integrable S (\x. Px x * log b (Px x))" unfolding finite_entropy_def by auto
lemma (in information_space) finite_entropy_distributed: "finite_entropy S X Px \ distributed M S X Px" unfolding finite_entropy_def by auto
lemma (in information_space) finite_entropy_nn: "finite_entropy S X Px \ x \ space S \ 0 \ Px x" by (auto simp: finite_entropy_def)
lemma (in information_space) finite_entropy_measurable: "finite_entropy S X Px \ Px \ S \\<^sub>M borel" using distributed_real_measurable[of S Px M X]
finite_entropy_nn[of S X Px] finite_entropy_distributed[of S X Px] by auto
lemma (in information_space) subdensity_finite_entropy: fixes g :: "'b \ real" and f :: "'c \ real" assumes T: "T \ measurable P Q" assumes f: "finite_entropy P X f" assumes g: "finite_entropy Q Y g" assumes Y: "Y = T \ X" shows"AE x in P. g (T x) = 0 \ f x = 0" using subdensity[OF T, of M X "\x. ennreal (f x)" Y "\x. ennreal (g x)"]
finite_entropy_distributed[OF f] finite_entropy_distributed[OF g]
finite_entropy_nn[OF f] finite_entropy_nn[OF g]
assms by auto
lemma (in information_space) finite_entropy_integrable_transform: "finite_entropy S X Px \ distributed M T Y Py \ (\x. x \ space T \ 0 \ Py x) \
X = (\<lambda>x. f (Y x)) \<Longrightarrow> f \<in> measurable T S \<Longrightarrow> integrable T (\<lambda>x. Py x * log b (Px (f x)))" using distributed_transform_integrable[of M T Y Py S X Px f "\x. log b (Px x)"] using distributed_real_measurable[of S Px M X] by (auto simp: finite_entropy_def)
subsection \<open>Mutual Information\<close>
definition (in prob_space) "mutual_information b S T X Y =
KL_divergence b (distr M S X \<Otimes>\<^sub>M distr M T Y) (distr M (S \<Otimes>\<^sub>M T) (\<lambda>x. (X x, Y x)))"
lemma (in information_space) mutual_information_indep_vars: fixes S T X Y defines"P \ distr M S X \\<^sub>M distr M T Y" defines"Q \ distr M (S \\<^sub>M T) (\x. (X x, Y x))" shows"indep_var S X T Y \
(random_variable S X \<and> random_variable T Y \<and>
absolutely_continuous P Q \<and> integrable Q (entropy_density b P Q) \<and>
mutual_information b S T X Y = 0)" unfolding indep_var_distribution_eq proof safe assume rv[measurable]: "random_variable S X""random_variable T Y"
interpret X: prob_space "distr M S X" by (rule prob_space_distr) fact interpret Y: prob_space "distr M T Y" by (rule prob_space_distr) fact interpret XY: pair_prob_space "distr M S X""distr M T Y"by standard interpret P: information_space P b unfolding P_def by standard (rule b_gt_1)
interpret Q: prob_space Q unfolding Q_def by (rule prob_space_distr) simp
{ assume"distr M S X \\<^sub>M distr M T Y = distr M (S \\<^sub>M T) (\x. (X x, Y x))" thenhave [simp]: "Q = P"unfolding Q_def P_def by simp
show ac: "absolutely_continuous P Q"by (simp add: absolutely_continuous_def) thenhave ed: "entropy_density b P Q \ borel_measurable P" by simp
have"AE x in P. 1 = RN_deriv P Q x" proof (rule P.RN_deriv_unique) show"density P (\x. 1) = Q" unfolding\<open>Q = P\<close> by (intro measure_eqI) (auto simp: emeasure_density) qed auto thenhave ae_0: "AE x in P. entropy_density b P Q x = 0" by (auto simp: entropy_density_def) thenhave"integrable P (entropy_density b P Q) \ integrable Q (\x. 0::real)" using ed unfolding\<open>Q = P\<close> by (intro integrable_cong_AE) auto thenshow"integrable Q (entropy_density b P Q)"by simp
from ae_0 have"mutual_information b S T X Y = (\x. 0 \P)" unfolding mutual_information_def KL_divergence_def P_def[symmetric] Q_def[symmetric] \<open>Q = P\<close> by (intro integral_cong_AE) auto thenshow"mutual_information b S T X Y = 0" by simp }
{ assume ac: "absolutely_continuous P Q" assume int: "integrable Q (entropy_density b P Q)" assume I_eq_0: "mutual_information b S T X Y = 0"
have eq: "Q = P" proof (rule P.KL_eq_0_iff_eq_ac[THEN iffD1]) show"prob_space Q"by unfold_locales show"absolutely_continuous P Q"by fact show"integrable Q (entropy_density b P Q)"by fact show"sets Q = sets P"by (simp add: P_def Q_def sets_pair_measure) show"KL_divergence b P Q = 0" using I_eq_0 unfolding mutual_information_def by (simp add: P_def Q_def) qed thenshow"distr M S X \\<^sub>M distr M T Y = distr M (S \\<^sub>M T) (\x. (X x, Y x))" unfolding P_def Q_def .. } qed
abbreviation (in information_space)
mutual_information_Pow (\<open>\<I>'(_ ; _')\<close>) where "\(X ; Y) \ mutual_information b (count_space (X`space M)) (count_space (Y`space M)) X Y"
lemma (in information_space) fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T" assumes Fx: "finite_entropy S X Px"and Fy: "finite_entropy T Y Py" assumes Fxy: "finite_entropy (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" defines"f \ \x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" shows mutual_information_distr': "mutual_information b S T X Y = integral\<^sup>L (S \\<^sub>M T) f" (is "?M = ?R") and mutual_information_nonneg': "0 \ mutual_information b S T X Y" proof - have Px: "distributed M S X Px"and Px_nn: "\x. x \ space S \ 0 \ Px x" using Fx by (auto simp: finite_entropy_def) have Py: "distributed M T Y Py"and Py_nn: "\x. x \ space T \ 0 \ Py x" using Fy by (auto simp: finite_entropy_def) have Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" and Pxy_nn: "\x. x \ space (S \\<^sub>M T) \ 0 \ Pxy x" "\x y. x \ space S \ y \ space T \ 0 \ Pxy (x, y)" using Fxy by (auto simp: finite_entropy_def space_pair_measure)
have [measurable]: "Px \ S \\<^sub>M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Py \ T \\<^sub>M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy \ (S \\<^sub>M T) \\<^sub>M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
have X[measurable]: "random_variable S X" using Px by auto have Y[measurable]: "random_variable T Y" using Py by auto interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. interpret X: prob_space "distr M S X"using X by (rule prob_space_distr) interpret Y: prob_space "distr M T Y"using Y by (rule prob_space_distr) interpret XY: pair_prob_space "distr M S X""distr M T Y" .. let ?P = "S \\<^sub>M T" let ?D = "distr M ?P (\x. (X x, Y x))"
{ fix A assume"A \ sets S" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M S X) A = emeasure ?D (A \ space T)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq1 = this
{ fix A assume"A \ sets T" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M T Y) A = emeasure ?D (space S \ A)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq2 = this
have distr_eq: "distr M S X \\<^sub>M distr M T Y = density ?P (\x. ennreal (Px (fst x) * Py (snd x)))" unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] proof (subst pair_measure_density) show"(\x. ennreal (Px x)) \ borel_measurable S" "(\y. ennreal (Py y)) \ borel_measurable T" using Px Py by (auto simp: distributed_def) show"sigma_finite_measure (density T Py)"unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. show"density (S \\<^sub>M T) (\(x, y). ennreal (Px x) * ennreal (Py y)) =
density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) qed fact
from Px Py have f: "(\x. Px (fst x) * Py (snd x)) \ borel_measurable ?P" by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') have PxPy_nonneg: "AE x in ?P. 0 \ Px (fst x) * Py (snd x)" using Px_nn Py_nn by (auto simp: space_pair_measure)
have A: "(AE x in ?P. Px (fst x) = 0 \ Pxy x = 0)" by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) moreover have B: "(AE x in ?P. Py (snd x) = 0 \ Pxy x = 0)" by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) ultimatelyhave ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \ Pxy x = 0" by auto
show"?M = ?R" unfolding M f_def using Pxy_nn Px_nn Py_nn by (intro ST.KL_density_density b_gt_1 f PxPy_nonneg ac) (auto simp: space_pair_measure)
have X: "X = fst \ (\x. (X x, Y x))" and Y: "Y = snd \ (\x. (X x, Y x))" by auto
have"integrable (S \\<^sub>M T) (\x. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x)))" using finite_entropy_integrable[OF Fxy] using finite_entropy_integrable_transform[OF Fx Pxy, of fst] using finite_entropy_integrable_transform[OF Fy Pxy, of snd] by (simp add: Pxy_nn) moreoverhave"f \ borel_measurable (S \\<^sub>M T)" unfolding f_def using Px Py Pxy by (auto intro: distributed_real_measurable measurable_fst'' measurable_snd''
intro!: borel_measurable_times borel_measurable_log borel_measurable_divide) ultimatelyhave int: "integrable (S \\<^sub>M T) f" proof (rule integrable_cong_AE_imp) show"AE x in S \\<^sub>M
T. Pxy x * log b (Pxy x) - Pxy x * log b (Px (fst x)) - Pxy x * log b (Py (snd x))
= f x" using A B by (auto simp: f_def field_simps space_pair_measure log_mult log_divide) qed
show"0 \ ?M" unfolding M proof (intro ST.KL_density_density_nonneg) show"prob_space (density (S \\<^sub>M T) (\x. ennreal (Pxy x))) " unfolding distributed_distr_eq_density[OF Pxy, symmetric] using distributed_measurable[OF Pxy] by (rule prob_space_distr) show"prob_space (density (S \\<^sub>M T) (\x. ennreal (Px (fst x) * Py (snd x))))" unfolding distr_eq[symmetric] by unfold_locales show"integrable (S \\<^sub>M T) (\x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" using int unfolding f_def . qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) qed
lemma (in information_space) fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" assumes"sigma_finite_measure S""sigma_finite_measure T" assumes Px: "distributed M S X Px"and Px_nn: "\x. x \ space S \ 0 \ Px x" and Py: "distributed M T Y Py"and Py_nn: "\y. y \ space T \ 0 \ Py y" and Pxy: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" and Pxy_nn: "\x y. x \ space S \ y \ space T \ 0 \ Pxy (x, y)" defines"f \ \x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x)))" shows mutual_information_distr: "mutual_information b S T X Y = integral\<^sup>L (S \\<^sub>M T) f" (is "?M = ?R") and mutual_information_nonneg: "integrable (S \\<^sub>M T) f \ 0 \ mutual_information b S T X Y" proof - have X[measurable]: "random_variable S X" using Px by (auto simp: distributed_def) have Y[measurable]: "random_variable T Y" using Py by (auto simp: distributed_def) have [measurable]: "Px \ S \\<^sub>M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Py \ T \\<^sub>M borel" using Py Py_nn by (intro distributed_real_measurable) have measurable_Pxy[measurable]: "Pxy \ (S \\<^sub>M T) \\<^sub>M borel" using Pxy Pxy_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. interpret X: prob_space "distr M S X"using X by (rule prob_space_distr) interpret Y: prob_space "distr M T Y"using Y by (rule prob_space_distr) interpret XY: pair_prob_space "distr M S X""distr M T Y" .. let ?P = "S \\<^sub>M T" let ?D = "distr M ?P (\x. (X x, Y x))"
{ fix A assume"A \ sets S" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M S X) A = emeasure ?D (A \ space T)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq1 = this
{ fix A assume"A \ sets T" with X[THEN measurable_space] Y[THEN measurable_space] have"emeasure (distr M T Y) A = emeasure ?D (space S \ A)" by (auto simp: emeasure_distr intro!: arg_cong[where f="emeasure M"]) } note marginal_eq2 = this
have distr_eq: "distr M S X \\<^sub>M distr M T Y = density ?P (\x. ennreal (Px (fst x) * Py (snd x)))" unfolding Px(1)[THEN distributed_distr_eq_density] Py(1)[THEN distributed_distr_eq_density] proof (subst pair_measure_density) show"(\x. ennreal (Px x)) \ borel_measurable S" "(\y. ennreal (Py y)) \ borel_measurable T" using Px Py by (auto simp: distributed_def) show"sigma_finite_measure (density T Py)"unfolding Py(1)[THEN distributed_distr_eq_density, symmetric] .. show"density (S \\<^sub>M T) (\(x, y). ennreal (Px x) * ennreal (Py y)) =
density (S \<Otimes>\<^sub>M T) (\<lambda>x. ennreal (Px (fst x) * Py (snd x)))" using Px_nn Py_nn by (auto intro!: density_cong simp: distributed_def ennreal_mult space_pair_measure) qed fact
from Px Py have f: "(\x. Px (fst x) * Py (snd x)) \ borel_measurable ?P" by (intro borel_measurable_times) (auto intro: distributed_real_measurable measurable_fst'' measurable_snd'') have PxPy_nonneg: "AE x in ?P. 0 \ Px (fst x) * Py (snd x)" using Px_nn Py_nn by (auto simp: space_pair_measure)
have"(AE x in ?P. Px (fst x) = 0 \ Pxy x = 0)" by (rule subdensity_real[OF measurable_fst Pxy Px]) (insert Px_nn Pxy_nn, auto simp: space_pair_measure) moreover have"(AE x in ?P. Py (snd x) = 0 \ Pxy x = 0)" by (rule subdensity_real[OF measurable_snd Pxy Py]) (insert Py_nn Pxy_nn, auto simp: space_pair_measure) ultimatelyhave ac: "AE x in ?P. Px (fst x) * Py (snd x) = 0 \ Pxy x = 0" by auto
show"?M = ?R" unfolding M f_def using b_gt_1 f PxPy_nonneg ac Pxy_nn by (intro ST.KL_density_density) (auto simp: space_pair_measure)
assume int: "integrable (S \\<^sub>M T) f" show"0 \ ?M" unfolding M proof (intro ST.KL_density_density_nonneg) show"prob_space (density (S \\<^sub>M T) (\x. ennreal (Pxy x))) " unfolding distributed_distr_eq_density[OF Pxy, symmetric] using distributed_measurable[OF Pxy] by (rule prob_space_distr) show"prob_space (density (S \\<^sub>M T) (\x. ennreal (Px (fst x) * Py (snd x))))" unfolding distr_eq[symmetric] by unfold_locales show"integrable (S \\<^sub>M T) (\x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))))" using int unfolding f_def . qed (insert ac, auto simp: b_gt_1 Px_nn Py_nn Pxy_nn space_pair_measure) qed
lemma (in information_space) fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" assumes"sigma_finite_measure S""sigma_finite_measure T" assumes Px[measurable]: "distributed M S X Px"and Px_nn: "\x. x \ space S \ 0 \ Px x" and Py[measurable]: "distributed M T Y Py"and Py_nn: "\x. x \ space T \ 0 \ Py x" and Pxy[measurable]: "distributed M (S \\<^sub>M T) (\x. (X x, Y x)) Pxy" and Pxy_nn: "\x. x \ space (S \\<^sub>M T) \ 0 \ Pxy x" assumes ae: "AE x in S. AE y in T. Pxy (x, y) = Px x * Py y" shows mutual_information_eq_0: "mutual_information b S T X Y = 0" proof - interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret ST: pair_sigma_finite S T .. note
distributed_real_measurable[OF Px_nn Px, measurable]
distributed_real_measurable[OF Py_nn Py, measurable]
distributed_real_measurable[OF Pxy_nn Pxy, measurable]
have"AE x in S \\<^sub>M T. Px (fst x) = 0 \ Pxy x = 0" by (rule subdensity_real[OF measurable_fst Pxy Px]) (auto simp: Px_nn Pxy_nn space_pair_measure) moreover have"AE x in S \\<^sub>M T. Py (snd x) = 0 \ Pxy x = 0" by (rule subdensity_real[OF measurable_snd Pxy Py]) (auto simp: Py_nn Pxy_nn space_pair_measure) moreover have"AE x in S \\<^sub>M T. Pxy x = Px (fst x) * Py (snd x)" by (intro ST.AE_pair_measure) (auto simp: ae intro!: measurable_snd'' measurable_fst'') ultimatelyhave"AE x in S \\<^sub>M T. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) = 0" by eventually_elim simp thenhave"(\x. Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) \(S \\<^sub>M T)) = (\x. 0 \(S \\<^sub>M T))" by (intro integral_cong_AE) auto thenshow ?thesis by (subst mutual_information_distr[OF assms(1-8)]) (auto simp add: space_pair_measure) qed
lemma (in information_space) mutual_information_simple_distributed: assumes X: "simple_distributed M X Px"and Y: "simple_distributed M Y Py" assumes XY: "simple_distributed M (\x. (X x, Y x)) Pxy" shows"\(X ; Y) = (\(x, y)\(\x. (X x, Y x))`space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" proof (subst mutual_information_distr[OF _ _ simple_distributed[OF X] _ simple_distributed[OF Y] _ simple_distributed_joint[OF XY]]) note fin = simple_distributed_joint_finite[OF XY, simp] show"sigma_finite_measure (count_space (X ` space M))" by (simp add: sigma_finite_measure_count_space_finite) show"sigma_finite_measure (count_space (Y ` space M))" by (simp add: sigma_finite_measure_count_space_finite) let ?Pxy = "\x. (if x \ (\x. (X x, Y x)) ` space M then Pxy x else 0)" let ?f = "\x. ?Pxy x * log b (?Pxy x / (Px (fst x) * Py (snd x)))" have"\x. ?f x = (if x \ (\x. (X x, Y x)) ` space M then Pxy x * log b (Pxy x / (Px (fst x) * Py (snd x))) else 0)" by auto with fin show"(\ x. ?f x \(count_space (X ` space M) \\<^sub>M count_space (Y ` space M))) =
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y)))" by (auto simp add: pair_measure_count_space lebesgue_integral_count_space_finite sum.If_cases split_beta'
intro!: sum.cong) qed (insert X Y XY, auto simp: simple_distributed_def)
lemma (in information_space) fixes Pxy :: "'b \ 'c \ real" and Px :: "'b \ real" and Py :: "'c \ real" assumes Px: "simple_distributed M X Px"and Py: "simple_distributed M Y Py" assumes Pxy: "simple_distributed M (\x. (X x, Y x)) Pxy" assumes ae: "\x\space M. Pxy (X x, Y x) = Px (X x) * Py (Y x)" shows mutual_information_eq_0_simple: "\(X ; Y) = 0" proof (subst mutual_information_simple_distributed[OF Px Py Pxy]) have"(\(x, y)\(\x. (X x, Y x)) ` space M. Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) =
(\<Sum>(x, y)\<in>(\<lambda>x. (X x, Y x)) ` space M. 0)" by (intro sum.cong) (auto simp: ae) thenshow"(\(x, y)\(\x. (X x, Y x)) ` space M.
Pxy (x, y) * log b (Pxy (x, y) / (Px x * Py y))) = 0" by simp qed
subsection \<open>Entropy\<close>
definition (in prob_space) entropy :: "real \ 'b measure \ ('a \ 'b) \ real" where "entropy b S X = - KL_divergence b S (distr M S X)"
abbreviation (in information_space)
entropy_Pow (\<open>\<H>'(_')\<close>) where "\(X) \ entropy b (count_space (X`space M)) X"
lemma (in prob_space) distributed_RN_deriv: assumes X: "distributed M S X Px" shows"AE x in S. RN_deriv S (density S Px) x = Px x" proof - have"distributed M S X (RN_deriv S (density S Px))" by (metis RN_derivI assms borel_measurable_RN_deriv distributed_def) thenshow ?thesis using assms distributed_unique by blast qed
lemma (in information_space) fixes X :: "'a \ 'b" assumes X[measurable]: "distributed M MX X f"and nn: "\x. x \ space MX \ 0 \ f x" shows entropy_distr: "entropy b MX X = - (\x. f x * log b (f x) \MX)" (is ?eq) proof - note D = distributed_measurable[OF X] distributed_borel_measurable[OF X] note ae = distributed_RN_deriv[OF X] note distributed_real_measurable[OF nn X, measurable]
have ae_eq: "AE x in distr M MX X. log b (enn2real (RN_deriv MX (distr M MX X) x)) = log b (f x)" unfolding distributed_distr_eq_density[OF X] using D ae by (auto simp: AE_density)
have int_eq: "(\ x. f x * log b (f x) \MX) = (\ x. log b (f x) \distr M MX X)" unfolding distributed_distr_eq_density[OF X] using D by (subst integral_density) (auto simp: nn)
show ?eq unfolding entropy_def KL_divergence_def entropy_density_def comp_def int_eq neg_equal_iff_equal using ae_eq by (intro integral_cong_AE) (auto simp: nn) qed
lemma (in information_space) entropy_le: fixes Px :: "'b \ real" and MX :: "'b measure" assumes X[measurable]: "distributed M MX X Px"and Px_nn[simp]: "\x. x \ space MX \ 0 \ Px x" and fin: "emeasure MX {x \ space MX. Px x \ 0} \ top" and int: "integrable MX (\x. - Px x * log b (Px x))" shows"entropy b MX X \ log b (measure MX {x \ space MX. Px x \ 0})" proof - note Px = distributed_borel_measurable[OF X] interpret X: prob_space "distr M MX X" using distributed_measurable[OF X] by (rule prob_space_distr)
have" - log b (measure MX {x \ space MX. Px x \ 0}) =
- log b (\<integral> x. indicator {x \<in> space MX. Px x \<noteq> 0} x \<partial>MX)" using Px Px_nn fin by (auto simp: measure_def) alsohave"- log b (\ x. indicator {x \ space MX. Px x \ 0} x \MX) = - log b (\ x. 1 / Px x \distr M MX X)" proof - have"integral\<^sup>L MX (indicator {x \ space MX. Px x \ 0}) = LINT x|MX. Px x *\<^sub>R (1 / Px x)" by (rule Bochner_Integration.integral_cong) auto alsohave"... = LINT x|density MX (\x. ennreal (Px x)). 1 / Px x" by (rule integral_density [symmetric]) (use Px Px_nn in auto) finallyshow ?thesis unfolding distributed_distr_eq_density[OF X] by simp qed alsohave"\ \ (\ x. - log b (1 / Px x) \distr M MX X)" proof (rule X.jensens_inequality[of "\x. 1 / Px x" "{0<..}" 0 1 "\x. - log b x"]) show"AE x in distr M MX X. 1 / Px x \ {0<..}" unfolding distributed_distr_eq_density[OF X] using Px by (auto simp: AE_density) have [simp]: "\x. x \ space MX \ ennreal (if Px x = 0 then 0 else 1) = indicator {x \ space MX. Px x \ 0} x" by (auto simp: one_ennreal_def) have"(\\<^sup>+ x. ennreal (- (if Px x = 0 then 0 else 1)) \MX) = (\\<^sup>+ x. 0 \MX)" by (intro nn_integral_cong) (auto simp: ennreal_neg) thenshow"integrable (distr M MX X) (\x. 1 / Px x)" unfolding distributed_distr_eq_density[OF X] using Px by (auto simp: nn_integral_density real_integrable_def fin ennreal_neg ennreal_mult[symmetric]
cong: nn_integral_cong) have"integrable MX (\x. Px x * log b (1 / Px x)) =
integrable MX (\<lambda>x. - Px x * log b (Px x))" using Px by (intro integrable_cong_AE) (auto simp: log_divide_pos log_recip) thenshow"integrable (distr M MX X) (\x. - log b (1 / Px x))" unfolding distributed_distr_eq_density[OF X] using Px int by (subst integrable_real_density) auto qed (auto simp: minus_log_convex[OF b_gt_1]) alsohave"\ = (\ x. log b (Px x) \distr M MX X)" unfolding distributed_distr_eq_density[OF X] using Px by (intro integral_cong_AE) (auto simp: AE_density log_divide_pos) alsohave"\ = - entropy b MX X" unfolding distributed_distr_eq_density[OF X] using Px by (subst entropy_distr[OF X]) (auto simp: integral_density) finallyshow ?thesis by simp qed
lemma (in information_space) entropy_le_space: fixes Px :: "'b \ real" and MX :: "'b measure" assumes X: "distributed M MX X Px"and Px_nn[simp]: "\x. x \ space MX \ 0 \ Px x" and fin: "finite_measure MX" and int: "integrable MX (\x. - Px x * log b (Px x))" shows"entropy b MX X \ log b (measure MX (space MX))" proof - note Px = distributed_borel_measurable[OF X] interpret finite_measure MX by fact have"entropy b MX X \ log b (measure MX {x \ space MX. Px x \ 0})" using int X by (intro entropy_le) auto alsohave"\ \ log b (measure MX (space MX))" using Px distributed_imp_emeasure_nonzero[OF X] by (intro Transcendental.log_mono)
(auto intro!: finite_measure_mono b_gt_1 less_le[THEN iffD2]
simp: emeasure_eq_measure cong: conj_cong) finallyshow ?thesis . qed
lemma (in information_space) entropy_uniform: assumes X: "distributed M MX X (\x. indicator A x / measure MX A)" (is "distributed _ _ _ ?f") shows"entropy b MX X = log b (measure MX A)" proof (subst entropy_distr[OF X]) have [simp]: "emeasure MX A \ \" using uniform_distributed_params[OF X] by (auto simp add: measure_def) have eq: "(\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) =
(\<integral> x. (- log b (measure MX A) / measure MX A) * indicator A x \<partial>MX)" using uniform_distributed_params[OF X] by (intro Bochner_Integration.integral_cong) (auto split: split_indicator simp: log_divide_pos zero_less_measure_iff) show"- (\ x. indicator A x / measure MX A * log b (indicator A x / measure MX A) \MX) =
log b (measure MX A)" unfolding eq using uniform_distributed_params[OF X] by (subst Bochner_Integration.integral_mult_right) (auto simp: measure_def less_top[symmetric] intro!: integrable_real_indicator) qed simp
lemma (in information_space) entropy_simple_distributed: "simple_distributed M X f \ \(X) = - (\x\X`space M. f x * log b (f x))" by (subst entropy_distr[OF simple_distributed])
(auto simp add: lebesgue_integral_count_space_finite)
lemma (in information_space) entropy_le_card_not_0: assumes X: "simple_distributed M X f" shows"\(X) \ log b (card (X ` space M \ {x. f x \ 0}))" proof - let ?X = "count_space (X`space M)" have"\(X) \ log b (measure ?X {x \ space ?X. f x \ 0})" by (rule entropy_le[OF simple_distributed[OF X]])
(insert X, auto simp: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space) alsohave"measure ?X {x \ space ?X. f x \ 0} = card (X ` space M \ {x. f x \ 0})" by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def Int_def) finallyshow ?thesis . qed
lemma (in information_space) entropy_le_card: assumes X: "simple_distributed M X f" shows"\(X) \ log b (real (card (X ` space M)))" proof - let ?X = "count_space (X`space M)" have"\(X) \ log b (measure ?X (space ?X))" by (rule entropy_le_space[OF simple_distributed[OF X]])
(insert X, auto simp: simple_distributed_finite[OF X] subset_eq integrable_count_space emeasure_count_space finite_measure_count_space) alsohave"measure ?X (space ?X) = card (X ` space M)" by (simp_all add: simple_distributed_finite[OF X] subset_eq emeasure_count_space measure_def) finallyshow ?thesis . qed
definition (in prob_space) "conditional_mutual_information b MX MY MZ X Y Z \
mutual_information b MX (MY \<Otimes>\<^sub>M MZ) X (\<lambda>x. (Y x, Z x)) -
mutual_information b MX MZ X Z"
abbreviation (in information_space)
conditional_mutual_information_Pow (\<open>\<I>'( _ ; _ | _ ')\<close>) where "\(X ; Y | Z) \ conditional_mutual_information b
(count_space (X ` space M)) (count_space (Y ` space M)) (count_space (Z ` space M)) X Y Z"
lemma (in information_space) assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T"and P: "sigma_finite_measure P" assumes Px[measurable]: "distributed M S X Px" and Px_nn[simp]: "\x. x \ space S \ 0 \ Px x" assumes Pz[measurable]: "distributed M P Z Pz" and Pz_nn[simp]: "\z. z \ space P \ 0 \ Pz z" assumes Pyz[measurable]: "distributed M (T \\<^sub>M P) (\x. (Y x, Z x)) Pyz" and Pyz_nn[simp]: "\y z. y \ space T \ z \ space P \ 0 \ Pyz (y, z)" assumes Pxz[measurable]: "distributed M (S \\<^sub>M P) (\x. (X x, Z x)) Pxz" and Pxz_nn[simp]: "\x z. x \ space S \ z \ space P \ 0 \ Pxz (x, z)" assumes Pxyz[measurable]: "distributed M (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x)) Pxyz" and Pxyz_nn[simp]: "\x y z. x \ space S \ y \ space T \ z \ space P \ 0 \ Pxyz (x, y, z)" assumes I1: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" assumes I2: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxz (x, z) / (Px x * Pz z)))" shows conditional_mutual_information_generic_eq: "conditional_mutual_information b S T P X Y Z
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" (is "?eq") and conditional_mutual_information_generic_nonneg: "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") proof - have [measurable]: "Px \ S \\<^sub>M borel" using Px Px_nn by (intro distributed_real_measurable) have [measurable]: "Pz \ P \\<^sub>M borel" using Pz Pz_nn by (intro distributed_real_measurable) have measurable_Pyz[measurable]: "Pyz \ (T \\<^sub>M P) \\<^sub>M borel" using Pyz Pyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) have measurable_Pxz[measurable]: "Pxz \ (S \\<^sub>M P) \\<^sub>M borel" using Pxz Pxz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure) have measurable_Pxyz[measurable]: "Pxyz \ (S \\<^sub>M T \\<^sub>M P) \\<^sub>M borel" using Pxyz Pxyz_nn by (intro distributed_real_measurable) (auto simp: space_pair_measure)
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret P: sigma_finite_measure P by fact interpret TP: pair_sigma_finite T P .. interpret SP: pair_sigma_finite S P .. interpret ST: pair_sigma_finite S T .. interpret SPT: pair_sigma_finite "S \\<^sub>M P" T .. interpret STP: pair_sigma_finite S "T \\<^sub>M P" .. interpret TPS: pair_sigma_finite "T \\<^sub>M P" S .. have TP: "sigma_finite_measure (T \\<^sub>M P)" .. have SP: "sigma_finite_measure (S \\<^sub>M P)" .. have YZ: "random_variable (T \\<^sub>M P) (\x. (Y x, Z x))" using Pyz by (simp add: distributed_measurable)
from Pxz Pxyz have distr_eq: "distr M (S \\<^sub>M P) (\x. (X x, Z x)) =
distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" by (simp add: comp_def distr_distr)
have"mutual_information b S P X Z =
(\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" by (rule mutual_information_distr[OF S P Px Px_nn Pz Pz_nn Pxz Pxz_nn]) alsohave"\ = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" using b_gt_1 Pxz Px Pz by (subst distributed_transform_integral[OF Pxyz _ Pxz _, where T="\(x, y, z). (x, z)"])
(auto simp: split_beta' space_pair_measure) finallyhave mi_eq: "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" .
have ae1: "AE x in S \\<^sub>M T \\<^sub>M P. Px (fst x) = 0 \ Pxyz x = 0" by (intro subdensity_real[of fst, OF _ Pxyz Px]) (auto simp: space_pair_measure) moreoverhave ae2: "AE x in S \\<^sub>M T \\<^sub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_real[of "\x. snd (snd x)", OF _ Pxyz Pz]) (auto simp: space_pair_measure) moreoverhave ae3: "AE x in S \\<^sub>M T \\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_real[of "\x. (fst x, snd (snd x))", OF _ Pxyz Pxz]) (auto simp: space_pair_measure) moreoverhave ae4: "AE x in S \\<^sub>M T \\<^sub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" by (intro subdensity_real[of snd, OF _ Pxyz Pyz]) (auto simp: space_pair_measure) ultimatelyhave ae: "AE x in S \\<^sub>M T \\<^sub>M P.
Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " using AE_space proof eventually_elim case (elim x) show ?case proof cases assume"Pxyz x \ 0" with elim have"0 < Px (fst x)""0 < Pz (snd (snd x))""0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)""0 < Pxyz x" by (auto simp: space_pair_measure less_le) thenshow ?thesis using b_gt_1 by (simp add: log_simps less_imp_le field_simps) qed simp qed with I1 I2 show ?eq unfolding conditional_mutual_information_def apply (subst mi_eq) apply (subst mutual_information_distr[OF S TP Px Px_nn Pyz _ Pxyz]) apply (auto simp: space_pair_measure) apply (subst Bochner_Integration.integral_diff[symmetric]) apply (auto intro!: integral_cong_AE simp: split_beta' simp del: Bochner_Integration.integral_diff) done
let ?P = "density (S \\<^sub>M T \\<^sub>M P) Pxyz" interpret P: prob_space ?P unfolding distributed_distr_eq_density[OF Pxyz, symmetric] by (rule prob_space_distr) simp
let ?Q = "density (T \\<^sub>M P) Pyz" interpret Q: prob_space ?Q unfolding distributed_distr_eq_density[OF Pyz, symmetric] by (rule prob_space_distr) simp
let ?f = "\(x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) / Pxyz (x, y, z)"
from subdensity_real[of snd, OF _ Pyz Pz _ AE_I2 AE_I2] have aeX1: "AE x in T \\<^sub>M P. Pz (snd x) = 0 \ Pyz x = 0" by (auto simp: comp_def space_pair_measure) have aeX2: "AE x in T \\<^sub>M P. 0 \ Pz (snd x)" using Pz by (intro TP.AE_pair_measure) (auto simp: comp_def)
have aeX3: "AE y in T \\<^sub>M P. (\\<^sup>+ x. ennreal (Pxz (x, snd y)) \S) = ennreal (Pz (snd y))" using Pz distributed_marginal_eq_joint2[OF P S Pz Pxz] by (intro TP.AE_pair_measure) auto
have"(\\<^sup>+ x. ?f x \?P) \ (\\<^sup>+ (x, y, z). Pxz (x, z) * (Pyz (y, z) / Pz z) \(S \\<^sub>M T \\<^sub>M P))" by (subst nn_integral_density)
(auto intro!: nn_integral_mono simp: space_pair_measure ennreal_mult[symmetric]) alsohave"\ = (\\<^sup>+(y, z). (\\<^sup>+ x. ennreal (Pxz (x, z)) * ennreal (Pyz (y, z) / Pz z) \S) \(T \\<^sub>M P))" by (subst STP.nn_integral_snd[symmetric])
(auto simp add: split_beta' ennreal_mult[symmetric] space_pair_measure intro!: nn_integral_cong) alsohave"\ = (\\<^sup>+x. ennreal (Pyz x) * 1 \T \\<^sub>M P)" proof - have D: "(\\<^sup>+ x. ennreal (Pxz (x, b)) * ennreal (Pyz (a, b) / Pz b) \S) = ennreal (Pyz (a, b))" if"Pz b = 0 \ Pyz (a, b) = 0" "a \ space T \ b \ space P" "(\\<^sup>+ x. ennreal (Pxz (x, b)) \S) = ennreal (Pz b)" for a b using that by (subst nn_integral_multc) (auto split: prod.split simp: ennreal_mult[symmetric]) show ?thesis apply (rule nn_integral_cong_AE) using aeX1 aeX3 by (force simp add: space_pair_measure D) qed alsohave"\ = 1" using Q.emeasure_space_1 distributed_distr_eq_density[OF Pyz] by (subst nn_integral_density[symmetric]) auto finallyhave le1: "(\\<^sup>+ x. ?f x \?P) \ 1" . alsohave"\ < \" by simp finallyhave fin: "(\\<^sup>+ x. ?f x \?P) \ \" by simp
have"(\\<^sup>+ x. ennreal (Pxyz x) *
ennreal (Pxz (fst x, snd (snd x)) * Pyz (snd x) / (Pz (snd (snd x)) * Pxyz x)) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P)) \<noteq> 0" proof let ?g = "\x. ennreal (Pxyz x) * (Pxz (fst x, snd (snd x)) * Pyz (snd x) / (Pz (snd (snd x)) * Pxyz x))" assume"(\\<^sup>+x. ?g x \(S \\<^sub>M T \\<^sub>M P)) = 0" thenhave"AE x in S \\<^sub>M T \\<^sub>M P. ?g x = 0" by (intro nn_integral_0_iff_AE[THEN iffD1]) auto thenhave"AE x in S \\<^sub>M T \\<^sub>M P. Pxyz x = 0" using ae2 ae3 ae4 by (auto split: if_split_asm simp: mult_le_0_iff divide_le_0_iff space_pair_measure) thenhave"(\\<^sup>+ x. ennreal (Pxyz x) \S \\<^sub>M T \\<^sub>M P) = 0" by (subst nn_integral_cong_AE[of _ "\x. 0"]) auto with P.emeasure_space_1 show False by (subst (asm) emeasure_density) (auto cong: nn_integral_cong) qed thenhave pos: "(\\<^sup>+x. ?f x \?P) \ 0" by (subst nn_integral_density) (simp_all add: split_beta')
have neg: "(\\<^sup>+ x. - ?f x \?P) = 0" by (subst nn_integral_0_iff_AE) (auto simp: space_pair_measure ennreal_neg)
have I3: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ Bochner_Integration.integrable_diff[OF I1 I2]]) using ae apply (auto simp: split_beta') done
have"- log b 1 \ - log b (integral\<^sup>L ?P ?f)" proof (intro le_imp_neg_le Transcendental.log_mono[OF b_gt_1]) haveIf: "integrable ?P ?f" unfolding real_integrable_def proof (intro conjI) from neg show"(\\<^sup>+ x. - ?f x \?P) \ \" by simp from fin show"(\\<^sup>+ x. ?f x \?P) \ \" by simp qed simp thenhave"(\\<^sup>+ x. ?f x \?P) = (\x. ?f x \?P)" apply (rule nn_integral_eq_integral) apply (auto simp: space_pair_measure ennreal_neg) done with pos le1 show"0 < (\x. ?f x \?P)" "(\x. ?f x \?P) \ 1" by (simp_all add: one_ennreal.rep_eq zero_less_iff_neq_zero[symmetric]) qed alsohave"- log b (integral\<^sup>L ?P ?f) \ (\ x. - log b (?f x) \?P)" proof (rule P.jensens_inequality[where a=0 and b=1 and I="{0<..}"]) show"AE x in ?P. ?f x \ {0<..}" unfolding AE_density[OF distributed_borel_measurable[OF Pxyz]] using ae1 ae2 ae3 ae4 by (auto simp: space_pair_measure less_le) show"integrable ?P ?f" unfolding real_integrable_def using fin neg by (auto simp: split_beta') have"integrable (S \\<^sub>M T \\<^sub>M P) (\x. Pxyz x * - log b (?f x))" apply (rule integrable_cong_AE[THEN iffD1, OF _ _ _ I3]) by (auto simp: log_mult log_divide field_simps) then show"integrable ?P (\x. - log b (?f x))" by (subst integrable_real_density) (auto simp: space_pair_measure) qed (auto simp: b_gt_1 minus_log_convex) alsohave"\ = conditional_mutual_information b S T P X Y Z" unfolding\<open>?eq\<close> apply (subst integral_real_density) apply simp apply (force simp: space_pair_measure) apply simp apply (intro integral_cong_AE) by (auto simp: field_simps log_divide) finallyshow ?nonneg by simp qed
lemma (in information_space) fixes Px :: "_ \ real" assumes S: "sigma_finite_measure S"and T: "sigma_finite_measure T"and P: "sigma_finite_measure P" assumes Fx: "finite_entropy S X Px" assumes Fz: "finite_entropy P Z Pz" assumes Fyz: "finite_entropy (T \\<^sub>M P) (\x. (Y x, Z x)) Pyz" assumes Fxz: "finite_entropy (S \\<^sub>M P) (\x. (X x, Z x)) Pxz" assumes Fxyz: "finite_entropy (S \\<^sub>M T \\<^sub>M P) (\x. (X x, Y x, Z x)) Pxyz" shows conditional_mutual_information_generic_eq': "conditional_mutual_information b S T P X Y Z
= (\<integral>(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Pxz (x, z) * (Pyz (y,z) / Pz z))) \<partial>(S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P))" (is "?eq") and conditional_mutual_information_generic_nonneg': "0 \ conditional_mutual_information b S T P X Y Z" (is "?nonneg") proof - note Px = Fx[THEN finite_entropy_distributed, measurable] note Pz = Fz[THEN finite_entropy_distributed, measurable] note Pyz = Fyz[THEN finite_entropy_distributed, measurable] note Pxz = Fxz[THEN finite_entropy_distributed, measurable] note Pxyz = Fxyz[THEN finite_entropy_distributed, measurable]
interpret S: sigma_finite_measure S by fact interpret T: sigma_finite_measure T by fact interpret P: sigma_finite_measure P by fact interpret TP: pair_sigma_finite T P .. interpret SP: pair_sigma_finite S P .. interpret ST: pair_sigma_finite S T .. interpret SPT: pair_sigma_finite "S \\<^sub>M P" T .. interpret STP: pair_sigma_finite S "T \\<^sub>M P" .. interpret TPS: pair_sigma_finite "T \\<^sub>M P" S .. have TP: "sigma_finite_measure (T \\<^sub>M P)" .. have SP: "sigma_finite_measure (S \\<^sub>M P)" ..
from Pxz Pxyz have distr_eq: "distr M (S \\<^sub>M P) (\x. (X x, Z x)) =
distr (distr M (S \<Otimes>\<^sub>M T \<Otimes>\<^sub>M P) (\<lambda>x. (X x, Y x, Z x))) (S \<Otimes>\<^sub>M P) (\<lambda>(x, y, z). (x, z))" by (simp add: distr_distr comp_def)
have"mutual_information b S P X Z =
(\<integral>x. Pxz x * log b (Pxz x / (Px (fst x) * Pz (snd x))) \<partial>(S \<Otimes>\<^sub>M P))" using Px Px_nn Pz Pz_nn Pxz Pxz_nn by (rule mutual_information_distr[OF S P]) (auto simp: space_pair_measure) alsohave"\ = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" using b_gt_1 Pxz Pxz_nn Pxyz Pxyz_nn by (subst distributed_transform_integral[OF Pxyz _ Pxz, where T="\(x, y, z). (x, z)"])
(auto simp: split_beta') finallyhave mi_eq: "mutual_information b S P X Z = (\(x,y,z). Pxyz (x,y,z) * log b (Pxz (x,z) / (Px x * Pz z)) \(S \\<^sub>M T \\<^sub>M P))" .
have ae1: "AE x in S \\<^sub>M T \\<^sub>M P. Px (fst x) = 0 \ Pxyz x = 0" by (intro subdensity_finite_entropy[of fst, OF _ Fxyz Fx]) auto moreoverhave ae2: "AE x in S \\<^sub>M T \\<^sub>M P. Pz (snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_finite_entropy[of "\x. snd (snd x)", OF _ Fxyz Fz]) auto moreoverhave ae3: "AE x in S \\<^sub>M T \\<^sub>M P. Pxz (fst x, snd (snd x)) = 0 \ Pxyz x = 0" by (intro subdensity_finite_entropy[of "\x. (fst x, snd (snd x))", OF _ Fxyz Fxz]) auto moreoverhave ae4: "AE x in S \\<^sub>M T \\<^sub>M P. Pyz (snd x) = 0 \ Pxyz x = 0" by (intro subdensity_finite_entropy[of snd, OF _ Fxyz Fyz]) auto ultimatelyhave ae: "AE x in S \\<^sub>M T \\<^sub>M P.
Pxyz x * log b (Pxyz x / (Px (fst x) * Pyz (snd x))) -
Pxyz x * log b (Pxz (fst x, snd (snd x)) / (Px (fst x) * Pz (snd (snd x)))) =
Pxyz x * log b (Pxyz x * Pz (snd (snd x)) / (Pxz (fst x, snd (snd x)) * Pyz (snd x))) " using AE_space proof eventually_elim case (elim x) show ?case proof cases assume"Pxyz x \ 0" with elim have"0 < Px (fst x)""0 < Pz (snd (snd x))""0 < Pxz (fst x, snd (snd x))" "0 < Pyz (snd x)""0 < Pxyz x" using Px_nn Pz_nn Pxz_nn Pyz_nn Pxyz_nn by (auto simp: space_pair_measure less_le) thenshow ?thesis using b_gt_1 by (simp add: log_simps less_imp_le field_simps) qed simp qed
have"integrable (S \\<^sub>M T \\<^sub>M P)
(\<lambda>x. Pxyz x * log b (Pxyz x) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pyz (snd x)))" using finite_entropy_integrable[OF Fxyz] using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] using finite_entropy_integrable_transform[OF Fyz Pxyz Pxyz_nn, of snd] by simp moreoverhave"(\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z)))) \ borel_measurable (S \\<^sub>M T \\<^sub>M P)" using Pxyz Px Pyz by simp ultimatelyhave I1: "integrable (S \\<^sub>M T \\<^sub>M P) (\(x, y, z). Pxyz (x, y, z) * log b (Pxyz (x, y, z) / (Px x * Pyz (y, z))))" apply (rule integrable_cong_AE_imp) using ae1 ae4 Px_nn Pyz_nn Pxyz_nn by (auto simp: log_divide log_mult field_simps) have"integrable (S \\<^sub>M T \\<^sub>M P)
(\<lambda>x. Pxyz x * log b (Pxz (fst x, snd (snd x))) - Pxyz x * log b (Px (fst x)) - Pxyz x * log b (Pz (snd (snd x))))" using finite_entropy_integrable_transform[OF Fxz Pxyz Pxyz_nn, of "\x. (fst x, snd (snd x))"] using finite_entropy_integrable_transform[OF Fx Pxyz Pxyz_nn, of fst] using finite_entropy_integrable_transform[OF Fz Pxyz Pxyz_nn, of "snd \ snd"] by simp
--> --------------------
--> maximum size reached
--> --------------------
¤ Dauer der Verarbeitung: 0.9 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung ist noch experimentell.