;; @(#) czech-convert.el -- conversions between various Czech encodings

;; @(#) $Id: czech-convert.el,v 3.4 1997/02/26 21:12:49 pdm Exp $
;; @(#) $Keywords: i18n, Czech, encoding, conversion $
;; $KnownCompatibility: 19.34, XEmacs 19.14 $

;; This file is *NOT* part of GNU Emacs nor XEmacs.

;; Copyright (C) 1995, 1996, 1997 Milan Zamazal

;; Author:       Milan Zamazal <pdm@fi.muni.cz>
;; Maintainer:   Milan Zamazal <pdm@fi.muni.cz>
;; Requires:     czech.el
;; Remark:       Don't laugh too loudly while reading this file, please.

;; COPYRIGHT NOTICE
;;
;; This program is free software; you can redistribute it and/or modify it
;; under the terms of the GNU General Public License as published by the Free
;; Software Foundation, version 2 of the License.
;;
;; This program is distributed in the hope that it will be useful, but
;; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
;; for more details.
;;
;; You should have received a copy of the GNU General Public License along
;; with GNU Emacs and/or this package.  If you did not, write to the
;; Free Software Foundation, Inc., 675 Mass Ave., Cambridge, MA 02139, USA.

;;; Commentary:

;; It implements many conversion functions.  There are used many character sets
;; for Czech and it causes many problems (thank you, M$ Bill!).  So this
;; program tries to simplify work with them as much as possible.

;;; History:

;; So long, so very long...

;;; Code:


;;; *** Start ***

(require 'czech)

(defconst cz-convert-version "$Id: czech-convert.el,v 3.4 1997/02/26 21:12:49 pdm Exp $"
  "Latest modification time and version number.")

(defun cz-convert-message-start (begin end)
  "Writes message about starting conversion.
Message is written iff size of region is greater or equal to
`cz-convert-verbose-limit'."
  (if (>= (- end begin) cz-convert-verbose-limit)
      (cz-message 5 "Converting...")))

(defun cz-convert-message-end (begin end)
  "Writes message about finishing conversion.
Message is written iff size of region is greater or equal to
`cz-convert-verbose-limit'."
  (if (>= (- end begin) cz-convert-verbose-limit)
      (cz-message 5 "Converting...done")))


;;; *** cstocs functions ***

(defun cz-use-cstocs (begin end)
  "Test whether to use external program for conversion."
  (and (not (eq cz-use-cstocs-limit 'infinity))
       (>= (- end begin) cz-use-cstocs-limit)))

(defun cz-convert-using-cstocs (begin end input-encoding output-encoding)
  "Convert region using an external program.
Input and output encodings are symbols or strings.
Allowed values of symbols: `ascii', `il2', `kam', `koi8', `win', `dos'.
If given encodings are strings, caller is fully responsible for existence of
appropriate files."
  (shell-command-on-region
   begin end
   (concat cz-use-cstocs-program
	   " "
	   (if (stringp input-encoding)
	       input-encoding
	     (cdr (assoc input-encoding cz-convert-symbol-strings)))
	   " "
	   (if (stringp output-encoding)
	       output-encoding
	     (cdr (assoc output-encoding cz-convert-symbol-strings))))
   t t))


;;; *** Internal conversions ****

(defvar cz-convert-internal-tables
  '(((kam il2) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ϫ͵ɾܩػԹ.........................")
    ((koi8 il2) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ϫ͵ɾܩػԹ....a۷.ߴ..ťةݮ.")
    ((win il2) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'\"....<''\"\".--.>|........")
    ((dos il2) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ܫȺ..̪۷Խè")
    ((il2 ascii) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~܍.A.L.LS.\"SSTZ-ZZ.a.l'ls..sstz\"zzRAAAALCCCEEEEIIDDNNOOOO.RUUUUYT.raaaalccceeeeiiddnnoooo/ruuuuyt.")
    ((il2 kam) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~.A.L.S\"SZ-Za.l's..sz\"zAACCEEIDNO.UT.aacceeidnout.")
    ((il2 koi8) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~.A.L.S.\"SZ-Za.ls.sz\"zACCEEIDNO.UT.aacceeidno/ut.")
    ((il2 win) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~")
    ((il2 dos) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~.ϕ減-.筜赶Ǝӷ⊙ꠃǄء墓")
    ((win kam) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'\"....<SZ''\"\".--.>sz...L.A|\".S.-.Z.l'.as\"zAACCEEIDNO.UT.aacceeidnout.")
    ((win koi8) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'\"....<SZ''\"\".--.>sz..L.A|.\".S..-.Z..l...as.\"zACCEEIDNO.UT.aacceeidno/ut.")
    ((win dos) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'\"....<''\"\".--.>.Ϥ|..-.....赶Ǝӷ⊙ꠃǄء墓")
    ((win ascii) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~'\"....S<STZZ''\"\".--.s>stzz...L.A|.\".S..-.Z...l'.s..as.L\"lzRAAAALCCCEEEEIIDDNNOOOO.RUUUUYT.raaaalccceeeeiiddnnoooo/ruuuuyt.")
    ((kam win) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ύ;ɞ܊؝Ԛ.....赶Ǝӷ⊙..................." "\\|\\|")
    ((koi8 win) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ύ;ɞ܊؝Ԛ......a...ż؊ݎ.")
    ((dos win) " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~܍Ⱥ..̪Խè"))
  "Translation tables for internal conversions.")

;; ISO 8859-2 to 7-bit ASCII (Czech to "csech")
;;;###autoload
(defun cz-convert-to-csech (begin end &optional append-message)
  "Converts region from ISO 8859-2 to 7-bit ASCII (so called \"csech\").
If APPEND-MESSAGE is non-`nil' or prefix argument is given, it appends message
about conversion to end of the converted region.  It converts all 8-bit
characters, but those that are not diacritic will be converted to dots."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (let ((input (if cz-windoze 'win 'il2)))
    (if (cz-use-cstocs begin end)
	(cz-convert-using-cstocs begin end input 'ascii)
      (translate-region begin end
			(car (cdr (assoc (list input 'ascii)
					 cz-convert-internal-tables))))
      (if (or append-message current-prefix-arg)
	  (progn
	    (goto-char end)
	    (insert "
esk text konvertovn na cesky text.
")))))
  (cz-convert-message-end begin end))

;; Kamenit to ISO 8859-2
(defun cz-convert-from-kamenik (begin end &optional leave-ctrl-m)
  "Converts region from brothers' Kamenit encoding to ISO 8859-2.
Brothers' Kamenit encoding was the most widenly used encoding before
Micro$oft attack.
If an optional argument LEAVE-CTRL-M is non-`nil', do not remove ^M's."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (let ((output (if cz-windoze 'win 'il2)))
    (if (cz-use-cstocs begin end)
	(cz-convert-using-cstocs begin end 'kamenik output)
      (translate-region begin end
			(car (cdr (assoc (list 'kam output)
					 cz-convert-internal-tables))))))
  (if (and (not leave-ctrl-m) (not cz-windoze))
      (cz-convert-ctrlm-remove begin end))
  (cz-convert-message-end begin end))

;; KOI-8 to ISO 8859-2
(defun cz-convert-from-koi8 (begin end &optional leave-ctrl-m)
  "Converts region from KOI-8 to ISO 8859-2.
KOI-8 is Soviet Union encoding used at least at one place in Czech Republic
yet.  This function does not convert `ch' and `CH' characters properly.  Also
the characters \\`a and \\`A aren't converted correctly -- ISO 8859-2 does not
contain them.
If an optional argument LEAVE-CTRL-M is non-`nil', do not remove ^M's."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (let ((output (if cz-windoze 'win 'il2)))
    (if (cz-use-cstocs begin end)
	(cz-convert-using-cstocs begin end 'koi8 output)
      (translate-region begin end
			(car (cdr (assoc (list 'koi8 output)
					 cz-convert-internal-tables))))))
  (if (and (not leave-ctrl-m) (not cz-windoze))
      (cz-convert-ctrlm-remove begin end))
  (cz-convert-message-end begin end))

;; Windoze to ISO 8859-2
(defun cz-convert-from-windoze (begin end &optional leave-ctrl-m)
  "Converts region from M$-Windoze (Czech version) encoding to ISO 8859-2.
Czech M$-Windoze encoding (\"code page 1250\") is some mutation of ISO 8859-2
used by Micro$oft in their Windoze interface.
If an optional argument LEAVE-CTRL-M is non-`nil', do not remove ^M's."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (if (cz-use-cstocs begin end)
      (cz-convert-using-cstocs begin end 'win 'il2)
    (translate-region begin end
		      (car (cdr (assoc (list 'win 'il2)
				       cz-convert-internal-tables)))))
  (if (and (not leave-ctrl-m) (not cz-windoze))
      (cz-convert-ctrlm-remove begin end))
  (cz-convert-message-end begin end))

;; M$-Latin to ISO 8859-2
(defun cz-convert-from-do$ (begin end &optional leave-ctrl-m)
  "Converts region from \"Code page 852\" to ISO 8859-2.
\"Code page 852\" is very stupid encoding introduced by Micro$oft here.
If an optional argument LEAVE-CTRL-M is non-`nil', do not remove ^M's."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (let ((output (if cz-windoze 'win 'il2)))
    (if (cz-use-cstocs begin end)
	(cz-convert-using-cstocs begin end 'dos output)
      (translate-region begin end
			(car (cdr (assoc (list 'dos output)
					 cz-convert-internal-tables))))))
  (if (and (not leave-ctrl-m) (not cz-windoze))
      (cz-convert-ctrlm-remove begin end))
  (cz-convert-message-end begin end))

;; ISO 8859-2 to Windoze
(defun cz-convert-from-iso (begin end &optional dummy)
  "Converts region from \"Code page 852\" to ISO 8859-2.
\"Code page 852\" is very stupid encoding introduced by Micro$oft here.
An optional argument DUMMY is provided only for compatibility with other
similar functions and has no effect."
  (interactive "*r")
  (cz-convert-message-start begin end)
  (let ((output (if cz-windoze 'win 'il2)))
    (if (cz-use-cstocs begin end)
	(cz-convert-using-cstocs begin end 'il2 output)
      (translate-region begin end
			(car (cdr (assoc (list 'il2 output)
					 cz-convert-internal-tables))))))
  (cz-convert-message-end begin end))


;;; *** Magical conversions to ISO 8859-2 ***

(defvar cz-convert-history nil
  "History for `cz-convert-to-iso'.")

;;;###autoload
(defun cz-convert-to-native (begin end &optional prefix)
  "Tries to identify encoding of the region and convert it to native encoding.
Known input encoding are: brothers' Kamenit (`kam'), KOI-8 (`koi8'),
CP 1250 (`win'), CP 852 (`dos').
Output encoding is ISO 8859-2; in Windoze it is one of the Micro$oft's
encodings (\"CP 1250\").
If prefix argument is used, an user is asked for input encoding."
  (interactive "*r\nP")
  (let (input)
    (if prefix
	(setq input
	      (intern (completing-read
		       "Input encoding: "
		       '(("kam" 1) ("koi8" 2) ("dos" 3) ("win" 4))
		       nil t nil cz-convert-history))))
    (or input
	(setq input (cz-convert-identify-encoding begin end)))
    (cond
     ((eq input 'kam)
      (cz-convert-from-kamenik begin end))
     ((eq input 'koi8)
      (cz-convert-from-koi8 begin end))
     ((eq input 'dos)
      (cz-convert-from-do$ begin end))
     ((eq input 'win)
      (if cz-windoze
	  (cz-message 2 "It seems to be in Windoze encoding already!")
	(cz-convert-from-windoze begin end)))
     ((eq input 'il2)
      (if cz-windoze
	  (cz-convert-from-iso begin end)
	(cz-message 2 "It seems to be in ISO 8859-2 already!")))
     ((eq input 'binary)
      (cz-message 2 "It think those are binary data!"))
     (t (error "Sorry, unknown encoding.")))))

;;;###autoload
(defalias 'cz-convert-to-iso 'cz-convert-to-native)

(defun cz-convert-identify-encoding (begin end)
  "Tries to identify encoding of the given region.
Known encodings are: ISO 8859-2 (`il2'), brothers' Kamenit (`kam'),
KOI-8 (`koi8'), CP 1250 (`win'), CP 852 (`dos').
Special encoding `binary' means that region probably contains binary data.
Returns symbol of the identified encoding."
  (let* ((buffer (get-buffer-create cz-convert-aux-buffer))
	 (encoding (if cz-windoze 'win 'il2))
	 (ratio-pair (cz-convert-test-region buffer begin end))
	 (ratio-max (cz-convert-evaluate-ratios ratio-pair))
	 (ratio-pair-max ratio-pair)
	 (enc-list (list '(kam . cz-convert-from-kamenik)
			 '(koi8 . cz-convert-from-koi8)
			 '(dos . cz-convert-from-do$)
			 (if cz-windoze
			     '(il2 . cz-convert-from-iso)
			   '(win . cz-convert-from-windoze))))
	 ratio)
    (while enc-list
      (setq ratio-pair (cz-convert-test-region
			buffer begin end (cdr (car enc-list))))
      (setq ratio (cz-convert-evaluate-ratios ratio-pair))
      (if (> ratio ratio-max)
	    (setq ratio-max ratio
		  ratio-pair-max ratio-pair
		  encoding (car (car enc-list))))
      (setq enc-list (cdr enc-list)))
;    (set-buffer-modified-p nil)
    (kill-buffer buffer)
    (if (> (cdr ratio-pair-max) cz-is-nonczech-max)	; non-binary data?
	'binary
      encoding)))

(defun cz-convert-test-region (target-buffer begin end &optional enc-function)
  "Copies region to auxiliary buffer and tests it for given encoding.
Tests only the part specified by user variables used in `cz-is-cz-text'.
If ENC-FUNCTION is `nil', no conversion is performed before testing."
  (save-excursion
    (set-buffer target-buffer)
    (delete-region (point-min) (point-max)))
  (append-to-buffer target-buffer begin end)
  (save-excursion
    (set-buffer target-buffer)
    (let* ((beg (min (+ (point-min) cz-is-skip-start) (point-max)))
	   (end (min (+ beg cz-is-test-size) (point-max)))
	   (len (- end beg)))
      (if (or (eq len 0)
	      (< len cz-is-buffer-empty)
	      (and (eq cz-is-buffer-empty 0) ; special heuristic
		   (< len cz-is-skip-start)))
	  (setq beg (point-min)))
      (if enc-function
	  (funcall enc-function beg end t))
      (cz-is-cz-text t t))))

(defun cz-convert-evaluate-ratios (ratio-pair)
  "Computes single value from ratio pair.
RATIO-PAIR is a two values list returned by the function `cz-is-cz-text'.
Value should express how much the characters in the text are similar to Czech
characters."
  (- (car ratio-pair) (cdr ratio-pair)))


;;; *** Conversions based on *.enc and accent cstocs files ***

;; History lists
(defvar cz-convert-inenc-history nil
  "History for `cz-convert-from-to'.")
(defvar cz-convert-outenc-history nil
  "History for `cz-convert-from-to'.")
;; List of created conversion lists - for `cz-convert-delete-tables'
(defvar cz-convert-symbols nil
  "List of variable names of the variables defining conversion lists.")

;; User interaction
;;;###autoload
(defun cz-convert-from-to (begin end input-encoding output-encoding)
  "Converts given region between two encodings.
These encodings must have their definitions files.  These encoding files
usually have the extension `.enc' and their format is specified in `cstools'
package.
Specified input and output encodings must be strings equal to name of
appropriate encoding files without extension.
If called with prefix argument, use the last given encodings.
See also variables `cz-encoding-files-dir', `cz-convert-onebymore', and
`cz-convert-cache-file'."
  (interactive
   (progn
     (barf-if-buffer-read-only)
     (if (and current-prefix-arg
	      (and cz-convert-inenc-history cz-convert-outenc-history))
	 (list (region-beginning) (region-end)
	       (car cz-convert-inenc-history) (car cz-convert-outenc-history))
       (let (input
	     output)
	 (while (string=
		 (setq input (completing-read "Input encoding: "
					      cz-convert-encfiles
					      nil nil nil
					      'cz-convert-inenc-history))
		 ""))
	 (while (string=
		 (setq output (completing-read "Output encoding: "
					       cz-convert-encfiles
					       nil nil nil
					       'cz-convert-outenc-history))
		 ""))
	 (list (region-beginning) (region-end) input output)))))
  ;; Oh, interactive was long
  (if (cz-use-cstocs begin end)
      ;; External conversion, no problem
      (cz-convert-using-cstocs begin end input-encoding output-encoding)
    ;; Could we use internal table?
    (let (conversion-name
	  (input-enc
	   (or (car (rassoc input-encoding cz-convert-symbol-strings))
	       (car (assoc (intern input-encoding)
			   cz-convert-symbol-strings))))
	  (output-enc
	   (or (car (rassoc output-encoding cz-convert-symbol-strings))
	       (car (assoc (intern output-encoding)
			   cz-convert-symbol-strings)))))
      (if (and (not cz-convert-onebymore)
	       input-enc
	       output-enc
	       (assoc (list input-enc output-enc) cz-convert-internal-tables))
	  ;; Yes, internal conversion
	  (cz-convert-fast begin end input-enc output-enc)
	;; Is special conversion table defined?
	(setq conversion-name
	      (intern
	       (concat "cz-conv-" input-encoding "-" output-encoding)))
	(or (boundp conversion-name)
	    (cz-convert-create-data
	     conversion-name input-encoding output-encoding))
	;; Well, now really it is, so convert
	(cz-convert-convert begin end conversion-name)))))

(defun cz-convert-convert (begin end conversion-name)
  "Converts region using conversion list CONVERSION-NAME."
  (cz-convert-message-start begin end)
  (let* ((data (eval conversion-name))
	 (translation-string (car data)) ; standard conversion string
	 (search-for (nth 1 data))	; search string of exceptions
	 (substitute-to (nth 2 data))) ; assoc. list of subst. for exceptions
    (save-excursion
      (save-restriction
	(narrow-to-region begin end)
	(if cz-convert-onebymore
	    ;; Conversion of exceptions
	    (let ((c-f-s case-fold-search)
		  (c-r case-replace))
	      (setq case-fold-search nil)
	      (setq case-replace nil)
	      (goto-char (point-min))
	      (while (re-search-forward search-for nil t)
		(replace-match
		 (cdr (assoc
		       (string-to-char (match-string 0)) substitute-to))
		 t t))
	      (setq case-fold-search c-f-s)
	      (setq case-replace c-r)))
	;; It comes *after* exceptions
	(translate-region (point-min) (point-max) translation-string))))
  (if (not cz-windoze) (cz-convert-ctrlm-remove begin end))
  (cz-convert-message-end begin end))

;;; Reading and transforming cstocs files

(defun cz-convert-create-data (name input-encoding output-encoding
				    &optional nomessage)
  "Creates data for conversion from INPUT-ENCODING to OUTPUT-ENCODING.
This data are contained in the list like
  (TRANSLATION-STRING SEARCH-FOR SUBSTITUTE-TO)
where
- the first element is the third argument of function `translate-region'
- the second element is the regular of all alternatives needing more than one
  character substitution
- and the third element is substiting string for them.
If the value of variable `cz-convert-cache-file' is non-`nil', tries firstly
to load data from the file specified by that variable.  Also new data are
appended to this file.
If NOMESSAGE is non-`nil', no messages are displayed during operation."
  (let ((conversion-name
	 (intern
	  (concat "cz-conv-" input-encoding "-" output-encoding)))
	enc-list-1			; input encoding
	enc-list-2			; output encoding
	accent-list			; how to handle undefined symbols
	(trans-string " 	

 !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~") ; 1st element of data-list
	(search-string "")		; 2nd element of data-list
	assoc-replace-list		; 3rd element of data-list
	aux
	aux-aux)
  ;; Firstly try to load from cache
  (if (and cz-convert-cache-file
	   (load cz-convert-cache-file t)
	   (boundp conversion-name))
      ()				; negation
    ;; We didn't load it
    (or nomessage
	(cz-message 7 "Building conversion table..."))
    (setq enc-list-1 (cz-read-enc-file input-encoding))
    (setq enc-list-2 (cz-read-enc-file output-encoding))
    (setq accent-list (cz-read-accent-file cz-convert-accent-file))
    ;; Build it
    (while enc-list-1
      (setq aux (car enc-list-1))
      (if (setq aux-aux (rassoc (cdr aux) enc-list-2))
	  (aset trans-string (car aux) (car aux-aux))
	(if cz-convert-unknown-char
	    (aset trans-string (car aux) cz-convert-unknown-char))
	(setq aux-aux (assoc (cdr aux) accent-list))
	(cond
	 ((null aux-aux)		; image not found
	  (cz-message 5
		      "Warning: Error in encoding/accent files (%S not found)."
		      (cdr aux)))
	 ((eq (length (cdr aux-aux)) 1)	; image is a character
	  (aset trans-string (car aux) (string-to-char (cdr aux-aux))))
	 (t				; otherwise (image is string)
	  (if (not (string= search-string ""))
	      (setq search-string (concat search-string "\\|")))
	  (setq search-string (concat search-string (list (car aux))))
	  (setq assoc-replace-list
		(cons (cons (car aux) (cdr aux-aux))
		      assoc-replace-list)))))
      (setq enc-list-1 (cdr enc-list-1)))
    ;; OK, set conversion data
    (set conversion-name
	 (list (copy-sequence trans-string) search-string assoc-replace-list))
    (setq cz-convert-symbols (cons conversion-name cz-convert-symbols))
    ;; If required, store them to cache file
    (if cz-convert-cache-file
	(save-excursion
	  (set-buffer (get-buffer-create cz-convert-aux-buffer))
	  (delete-region (point-min) (point-max))
	  (insert "(setq " (symbol-name conversion-name) "\n'")
	  (prin1 (eval conversion-name) (current-buffer))
	  (insert ")\n")
	  (append-to-file (point-min) (point-max) cz-convert-cache-file)
	  (kill-buffer cz-convert-aux-buffer)))
    ;; All done
    (or nomessage
	(cz-message 7 "Building conversion table...done.")))))

(defun cz-read-accent-file (file)
  "Reads accent file.
Returns association list of items
  (SYMBOL-NAME . STRING)"
  (save-excursion
    (cz-find-enc-file file)
    (let (assoc-list)
      (set-buffer cz-convert-aux-buffer)
      (while (re-search-forward
	      "^[ \t]*\\([^ \t\n]+\\)[ \t]+\\([^ \t\n]+\\)[ \t]*$"
	      nil t)
	(setq assoc-list
	      (cons (cons (intern (match-string 1)) (match-string 2))
		    assoc-list)))
      (kill-buffer cz-convert-aux-buffer)
      assoc-list)))

(defun cz-read-enc-file (file)
  "Reads encoding file.
Returns association list of items
  (NUMERIC-VALUE . STRING)"
  (save-excursion
    (cz-find-enc-file file)
    (let (assoc-list)
      (set-buffer cz-convert-aux-buffer)
      (while (re-search-forward
	      "^[ \t]*\\([^ \t\n]+\\)[ \t]+\\([^ \t\n]+\\)[ \t]*$"
	      nil t)
	(setq assoc-list
	      (cons (cons (string-to-number (match-string 1))
			  (intern (match-string 2)))
		    assoc-list)))
      (kill-buffer cz-convert-aux-buffer)
      assoc-list)))

(defun cz-find-enc-file (file &optional no-error)
  "Tries to locate given encoding file and returns its full name.
File is searched in the following order:
  1. File `FILE' in the current directory.
  2. File `FILE.enc' in the current directory.
  3. File `FILE' in the directory determined by the variable
     `cz-encoding-files-dir'.
  4. File `FILE.enc' in that directory.
If the file is found, it is read into buffer `cz-convert-aux-buffer' -- its
previous contents is deleted and point is set to its beginning.
If the file is not found, error is forced.  If an optional argument NO-ERROR is
non-`nil', no error is forced and `nil' is returned."
  (cond
   ((file-readable-p file) file)
   ((file-readable-p (concat file ".enc")) (concat file ".enc"))
   (t
    (let*
	((path
	   (cond
	    ((not cz-encoding-files-dir) "")
	    ((string= (substring cz-encoding-files-dir -1 nil) "/")
	     cz-encoding-files-dir)
	    (t (concat cz-encoding-files-dir "/"))))
	 (file-name (concat path file)))
      (cond
       ((file-readable-p file-name))
       ((file-readable-p (concat file-name ".enc"))
	(setq file-name (concat file-name ".enc")))
       (no-error
	(setq file-name nil))
       (t
	(error "No file similar to `%s' found." file-name)))
      (if file-name
	  (save-excursion
	    (set-buffer (get-buffer-create cz-convert-aux-buffer))
	    (insert-file-contents file-name nil nil nil t)
	    (goto-char (point-min))))
      file-name))))

;;; Destroying all conversion tables
(defun cz-convert-delete-tables ()
  "Clears all conversion tables and deletes cache file."
  (interactive)
  (while cz-convert-symbols
    (unintern (car cz-convert-symbols))
    (setq cz-convert-symbols (cdr cz-convert-symbols)))
  (if (and cz-convert-cache-file
	   (file-exists-p cz-convert-cache-file))
      (delete-file cz-convert-cache-file)))


;;; *** On fly conversion ***

;;; Minor mode

(defvar cz-convert-auto-mode nil
  "Control variable for `cz-convert-auto-mode' minor mode.")
(make-variable-buffer-local 'cz-convert-auto-mode)

(defvar cz-convert-auto-modeline ""
  "String inserted into modeline by `cz-convert-auto-mode' minor mode.")
(make-variable-buffer-local 'cz-convert-auto-modeline)
(put 'cz-convert-auto-modeline 'permanent-local t)

;; Type of autoconversion can't depend on `cz-convert-auto-modeline' -- it
;; would be too fragile and didn't work in some Emacsen (Windoze version,
;; XEmacs).
(defvar cz-convert-auto-type nil
  "Type of format conversion.")
(make-variable-buffer-local 'cz-convert-auto-type)
(put 'cz-convert-auto-type 'permanent-local t)

(defun cz-convert-auto-mode (&optional dummy)
  "Minor mode for Emacs format conversion.
It is NOT intended for regular or even interactive usage!"
  (setq cz-convert-auto-mode t))
(put 'cz-convert-auto-mode 'permanent-local t) ; needed only in Windoze!?
(cz-add-minor-mode 'cz-convert-auto-mode 'cz-convert-auto-modeline)

;;; Recoding functions

(defun cz-convert-decode (begin end)
  "Format conversion decoding function."
  ;; Do we have to do encoding?
  (if cz-convert-auto-flag
      ;; Yes:
      (let ((encoding (cz-convert-identify-encoding begin end)))
	;; Native encoding
	(if (or (and (not cz-windoze) (eq encoding 'il2))
		(and cz-windoze (eq encoding 'win)))
	    (setq encoding 'native))
	;; Convert to native encoding
	(if (and (not (eq encoding 'binary))
		 (not (eq encoding 'native)))
	    (if cz-windoze
		(cz-convert-fast begin end encoding 'win)
	      (cz-convert-fast begin end encoding 'il2)))
	;; Set encoding for futher usage (only if it is not set yet)
	(if (and (not cz-convert-auto-type)
		 (not (eq encoding 'binary))
		 (not (eq encoding 'native)))
	    (setq cz-convert-auto-modeline
		  (concat " A[" (symbol-name encoding) "]")
		  cz-convert-auto-type encoding))
	;; We have to avoid infinite loop
	(cz-convert-auto-disable-internal)
	;; Remove those silly characters
	(if cz-windoze
	    (if (eq encoding 'il2)
		(setq buffer-file-type t))
	  (if (not (eq encoding 'binary))
	      (setq end (cz-convert-ctrlm-remove begin end))))
	;; Return (possibly new) end of region
	end)
    ;; No:
    (cz-convert-auto-disable-internal)
    end))

(defun cz-convert-encode (begin end &optional encoding)
  "Format conversion encoding function."
  (if (not encoding)
      (setq encoding 'native))
  (if (and (not (eq encoding 'binary))
	   (not (eq encoding 'native)))
      (progn
	(cz-convert-fast begin end (if cz-windoze 'win 'il2) encoding)
	(if (not cz-windoze)
	    (setq end (cz-convert-ctrlm-add begin end)))))
  end)
(defun cz-convert-encode-native (begin end)
  (cz-convert-encode begin end 'native))
(defun cz-convert-encode-il2 (begin end)
  (cz-convert-encode begin end 'il2))
(defun cz-convert-encode-kam (begin end)
  (cz-convert-encode begin end 'kam))
(defun cz-convert-encode-koi8 (begin end)
  (cz-convert-encode begin end 'koi8))
(defun cz-convert-encode-win (begin end)
  (cz-convert-encode begin end 'win))
(defun cz-convert-encode-dos (begin end)
  (cz-convert-encode begin end 'dos))

;;;###autoload
(defun cz-convert-undo$ ()
  "Removes those silly chars (^M and ^Z) from end of lines in buffer."
  (interactive)
  (cz-convert-ctrlm-remove (point-min) (point-max)))

;;;###autoload
(defun cz-convert-do$ ()
  "Adds that silly char (^M) to end of lines of the current buffer."
  (interactive)
  (cz-convert-ctrlm-add (point-min) (point-max)))

(defun cz-convert-ctrlm-remove (begin end)
  "Remove all those silly chars (^M and ^Z) from region containing DO$ file.
Returns new end of given region."
  (let ((i 0))
    (save-excursion
      (save-restriction
	(narrow-to-region begin end)
	(goto-char (point-min))
	(while (re-search-forward "
$" nil t)
	  (setq i (1+ i))
	  (replace-match ""))
	(if (re-search-forward "\\'" nil t)
	    (progn
	      (setq i (1+ i))
	      (replace-match "")))))
    ;; Return value
    (- end i)))

(defun cz-convert-ctrlm-add (begin end)
  "Add that silly char (^M) to end of lines of the current buffer.
Returns new end of given region."
  (let ((i 0))
    (save-excursion
      (save-restriction
	(narrow-to-region begin end)
	(goto-char (point-min))
	(while (and (not (eq (point) (point-max)))
		    (re-search-forward "$" nil t))
	  (setq i (1+ i))
	  (replace-match "
")
	  (goto-char (1+ (point))))))
    ;; Return value
    (- end i)))

(defun cz-convert-fast (begin end input-enc output-enc)
  "One-by-one conversion of the given region using internal tables.
One of the INPUT-ENC and OUTPUT-ENC must be one of symbols `il2' or `win'.
The value of the other from variables INPUT-ENC and OUTPUT-ENC can be any of
symbols `il2', `kam', `koi8', `win', or `dos'."
  (if (not (eq input-enc output-enc))
      (progn
	(cz-convert-message-start begin end)
	(translate-region begin end
			  (car (cdr (assoc (list input-enc output-enc)
					   cz-convert-internal-tables))))
	(cz-convert-message-end begin end))))

(defun cz-convert-auto-hook ()
  "After conversion initialization of autoconversion.
It is the only solution I have found to get round limitations of `format.el'."
  (remove-hook 'pre-command-hook 'cz-convert-auto-hook)
  (let ((aux cz-convert-auto-flag))
    (cz-convert-auto-enable)
    (setq cz-convert-auto-flag aux))
  (let ((encodings '(cz-autoconv
		     cz-autoconv-native cz-autoconv-il2 cz-autoconv-kam
		     cz-autoconv-koi8 cz-autoconv-win cz-autoconv-dos)))
    ;; Remove old encoding in `buffer-file-format'
    (while encodings
      (setq buffer-file-format (delete (car encodings) buffer-file-format))
      (setq encodings (cdr encodings)))
    ;; Set proper encoding in `buffer-file-format'
    (if cz-convert-auto-type
	(setq buffer-file-format
	      (cons (intern (concat "cz-autoconv-"
				    (symbol-name cz-convert-auto-type)))
		    buffer-file-format)))))

;;; Interactive functions

;;;###autoload
(defun cz-convert-auto-enable ()
  "Enables automatic conversion (globaly)."
  (interactive)
  (if (not (assoc 'cz-autoconv format-alist))
      (progn
	(setq format-alist
	      (cons '(cz-autoconv
		      "Tries to convert to native encoding"
		      "."			; regexp
		      cz-convert-decode
		      cz-convert-encode
		      t
		      cz-convert-auto-mode)
		    format-alist))
	(let ((functions '("native" "il2" "kam" "koi8" "win" "dos")))
	  (while functions
	    (setq format-alist
		  (cons (list (intern (concat "cz-autoconv-"
					      (car functions)))
			      "" nil nil
			      (intern (concat "cz-convert-encode-"
					      (car functions)))
			      t
			      'cz-convert-auto-mode)
			format-alist))
	    (setq functions (cdr functions))))))
  (setq cz-convert-auto-flag t))

;;;###autoload
(defun cz-convert-auto-disable ()
  "Disables automatic conversion (globaly)."
  (interactive)
  (setq cz-convert-auto-flag nil))

(defun cz-convert-auto-disable-internal ()
  "Disables automatic conversion temporarily.
It is needed for avoiding an infinite loop while performing decoding."
  (let ((elements '(cz-autoconv
		    cz-autoconv-native cz-autoconv-il2 cz-autoconv-kam
		    cz-autoconv-koi8 cz-autoconv-win cz-autoconv-dos))
	def)
    ;; Encodings
    (while elements
      (setq def (assoc (car elements) format-alist))
      (if def
	  (setq format-alist (delete def format-alist)))
      (setq elements (cdr elements))))
  (add-hook 'pre-command-hook 'cz-convert-auto-hook))

;;;###autoload
(defun cz-convert-auto-toggle (&optional arg)
  "Toggles automatic conversion (globaly).
If an optional argument ARG is positive, set automatic conversion on.
If an optional argument ARG is negative or zero, set automatic conversion
off."
  (interactive)
  (let ((prefix (prefix-numeric-value arg)))
    (if (or (null arg)
	    (and cz-convert-auto-flag (> prefix 0))
	    (and (not cz-convert-auto-flag) (<= prefix 0)))
	(if cz-convert-auto-flag
	    (progn
	      (cz-convert-auto-disable)
	      (cz-message 4 "Autoconversion disabled."))
	  (cz-convert-auto-enable)
	  (cz-message 4 "Autoconversion enabled.")))))

(defvar cz-convert-filenc-history nil
  "History for `cz-convert-auto-encoding'.")

;;;###autoload
(defun cz-convert-auto-encoding (encoding)
  "Sets encoding of the buffer for automatic encoding conversion.
The following encodings are supported: ISO-8859-2, brothers' Kamenit,
KOI-8, M$-Windoze (\"CP 1250\"), and DO$ (\"CP 852\")."
  (interactive
   (let (encoding)
     (while (string=
	     (setq encoding (completing-read "File encoding: "
					     '(("il2" 1) ("kam" 2) ("koi8" 3)
					       ("win" 4) ("dos" 5) ("none" 6))
					     nil t nil
					     'cz-convert-filenc-history))
	     ""))
     (if (string= encoding "none")
	 (setq encoding nil))
     (cz-convert-auto-set encoding))))

;;;###autoload
(defun cz-convert-auto-set (encoding)
  "Noninteractive variation of `cz-convert-auto-encoding'.
If ENCODING is `nil', autoconversion is disabled for the buffer."
  ;; Initialize autoconversions
  (if (not cz-convert-auto-flag)
      (progn
	(cz-convert-auto-enable)
	(cz-convert-auto-disable)))
  ;; Set encoding
  (let ((elements '(cz-autoconv
		    cz-autoconv-native cz-autoconv-il2 cz-autoconv-kam
		    cz-autoconv-koi8 cz-autoconv-win cz-autoconv-dos)))
    ;; Remove old encodings
    (while elements
      (setq buffer-file-format (delq (car elements) buffer-file-format))
      (setq elements (cdr elements)))
    ;; Enabled or disabled?
    (if encoding
	;; Set new encoding
	(progn
	  (setq buffer-file-format
		(cons (intern (concat "cz-autoconv-" encoding))
		      buffer-file-format))
	  ;; Silly characters handling in Windoze
	  (if cz-windoze
	      (setq buffer-file-type (eq encoding 'il2)))
	  ;; Update modeline
	  (setq cz-convert-auto-mode t
		cz-convert-auto-modeline (concat " A[" encoding "]")
		cz-convert-auto-type encoding))
      ;; Disable conversion for the buffer
      (if cz-windoze
	  (setq buffer-file-type (find-buffer-file-type (buffer-file-name))))
      (setq cz-convert-auto-mode nil
	    cz-convert-auto-type nil)))
  (if cz-xemacs
      (redraw-modeline)))


;;; *** Announce ***

(provide 'czech-convert)


;;; czech-convert.el ends here

