clear all close all fid = fopen('CandideVoltaire.txt','r'); A = fscanf(fid,'%c'); % scan the file, detecting new characters on the fly Alphabet = sprintf('%s',A(1)); Prob_symb=1; mat=[]; for i=2:length(A) % is it a new character? ind_let = find(Alphabet == A(i)); if isempty(ind_let) Alphabet = sprintf('%s%s', Alphabet,A(i)); Prob_symb(end+1) = 1; else Prob_symb(ind_let) = Prob_symb(ind_let) + 1; end %pause end Prob_symb = Prob_symb/length(A); % normalize figure(1) clf bar(Prob_symb) title('fréquence des letters dans le morceau de Candide') text(0, max(Prob_symb)*.9,sprintf('%s', Alphabet), 'Color', 'r') % Now re-order the symbols by probability H = entropy(Prob_symb) last = length(Prob_symb); % sort the symbols by (decreasing order of) probability [Y,I] = sort(Prob_symb); % now find the function H_delta(X^1) one_minus_delta = 1; H_delta = log2(last); for i= 1:last one_minus_delta(end+1) = one_minus_delta(end) - Y(i); H_delta(end+1) = log2(last-i); end delta = 1-one_minus_delta; % plot figure(3) clf; hold on plot(delta, H_delta) title('H_\delta') text(0.05, log2(last), 'log_2(|X|)', 'Color', 'b') plot([0 1],[H H],'c') text (.95, 1.1*H, 'H(p)') % code for a given probability of error P_error = 0.035; i_N = max(find(delta < P_error)) M = 2^H_delta(i_N); % number of elements in H_delta N =ceil(H_delta(i_N)); % number of bits in the code must be integer.... SortedAlphabet = Alphabet(I(last:-1:1)) % to have the symbols by decreasing order of probability % simulate coding/deconding for the lossy coder NewText = []; for i=1: length(A) k = find(SortedAlphabet == A(i)); % index in the list of symbols if (k > M) % no code k =fix(rand(1)*M)+1; end if isempty(NewText) NewText = sprintf('%s',SortedAlphabet(k)); else NewText = sprintf('%s%s',NewText,SortedAlphabet(k)); end end % check that the code is **incomplete** % change the script to have a complete code