;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.15 2006/06/12 23:26:12 edi Exp $

;;; Copyright (c) 2005-2006, Dr. Edmund Weitz.  All rights reserved.

;;; Redistribution and use in source and binary forms, with or without
;;; modification, are permitted provided that the following conditions
;;; are met:

;;;   * Redistributions of source code must retain the above copyright
;;;     notice, this list of conditions and the following disclaimer.

;;;   * Redistributions in binary form must reproduce the above
;;;     copyright notice, this list of conditions and the following
;;;     disclaimer in the documentation and/or other materials
;;;     provided with the distribution.

;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

(in-package :flexi-streams)

(defvar +iso-8859-hashes+
  (loop for (name . table) in +iso-8859-tables+
        collect (cons name (invert-table table)))
  "An alist which maps names for ISO-8859 encodings to hash
tables which map character codes to the corresponding octets.")

(defvar +code-page-hashes+
  (loop for (id . table) in +code-page-tables+
        collect (cons id (invert-table table)))
  "An alist which maps IDs of Windows code pages to hash tables
which map character codes to the corresponding octets.")

(defvar +ascii-hash+ (invert-table +ascii-table+)
  "A hash table which maps US-ASCII character codes to the
corresponding octets.")

(defun write-byte* (byte flexi-output-stream)
  "Writes one byte \(octet) to the underlying stream of
FLEXI-OUTPUT-STREAM."
  ;; we use WRITE-SEQUENCE because WRITE-BYTE doesn't work with all
  ;; bivalent streams in LispWorks (4.4.6)
  (write-sequence (make-array 1 :element-type 'octet
                                :initial-element byte)
                  (flexi-stream-stream flexi-output-stream))
  byte)

(defun translate-char-8-bit (flexi-stream char-code encoding-hash)
  "Converts the character denoted by the character code CHAR-CODE
into a one-element list with the corresponding octet which is
found by looking it up in the hash table ENCODING-HASH."
  (let ((octet (gethash char-code encoding-hash)))
    (cond (octet (list octet))
          (t (signal-encoding-error flexi-stream
                                     "Character with code #x~X is not in this encoding."
                                     char-code)))))

(defun translate-char-utf-32 (char-code little-endian)
  "Converts the character denoted by the character code CHAR-CODE
into a list of four octets which correspond to its UTF-32
encoding.  LITTLE-ENDIAN denotes the endianess of the encoding."
  (loop for position in (if little-endian
                          '(0 8 16 24)
                          '(24 16 8 0))
        collect (ldb (byte 8 position) char-code)))

(defun translate-char-utf-16 (char-code little-endian)
  "Converts the character denoted by the character code CHAR-CODE
into a list of two or four octets which correspond to its UTF-16
encoding.  LITTLE-ENDIAN denotes the endianess of the encoding."
  (let (result)
    (flet ((collect-word (word)
             (cond (little-endian
                    (push (ldb (byte 8 0) word) result)
                    (push (ldb (byte 8 8) word) result))
                   (t (push (ldb (byte 8 8) word) result)
                      (push (ldb (byte 8 0) word) result)))))
      (cond ((< char-code #x10000)
             (collect-word char-code))
            (t (let ((low-part (ldb (byte 10 0) char-code))
                     (high-part (ldb (byte 10 10) char-code)))
                 (collect-word (logior #xd800 high-part))
                 (collect-word (logior #xdc00 low-part))))))
    (nreverse result)))

(defun translate-char-utf-8 (char-code)
  "Converts the character denoted by the character code CHAR-CODE
into a list of up to six octets which correspond to its UTF-8
encoding."
  (let* (result
         (count
          (cond ((< char-code #x80) (push char-code result) nil)
                ((< char-code #x800) 1)
                ((< char-code #x10000) 2)
                ((< char-code #x200000) 3)
                ((< char-code #x4000000) 4)
                (t 5))))
    (when count
      (loop for rest = char-code then (ash rest -6)
            repeat count
            do (push (logior #b10000000
                             (logand #b111111 rest))
                     result)
            finally (push (logior (logand #b11111111
                                          (ash #b1111110 (- 6 count)))
                                  rest)
                          result)))
    result))
            
(defun translate-char (char-code flexi-stream)
  "Converts the character denoted by the character code CHAR-CODE
into a list of octets corresponding to the current encoding
\(i.e. external format) of FLEXI-STREAM.  Also handles #\Newline
according to the EOL-STYLE slot of the external format."
  (let* ((external-format (flexi-stream-external-format flexi-stream))
         (external-format-name (external-format-name external-format)))
    (flet ((translate-one-char (char-code)
             "Helper function which does all the work except for
#\Newline handling."
             (cond ((ascii-name-p external-format-name)
                    (translate-char-8-bit flexi-stream char-code +ascii-hash+))
                   ((iso-8859-name-p external-format-name)
                    (translate-char-8-bit flexi-stream char-code
                                          (cdr (assoc external-format-name
                                                      +iso-8859-hashes+
                                                      :test #'eq))))
                   ((code-page-name-p external-format-name)
                    (translate-char-8-bit flexi-stream char-code
                                          (cdr (assoc (external-format-id external-format)
                                                      +code-page-hashes+))))
                   (t (let ((external-format-little-endian
                             (external-format-little-endian external-format)))
                        (case external-format-name
                          (:utf-8 (translate-char-utf-8 char-code))
                          (:utf-16 (translate-char-utf-16 char-code
                                                          external-format-little-endian))
                          (:utf-32 (translate-char-utf-32 char-code
                                                          external-format-little-endian))))))))
      (cond ((= char-code #.(char-code #\Newline))
             (case (external-format-eol-style external-format)
               (:cr (translate-one-char #.(char-code #\Return)))
               (:lf (translate-one-char #.(char-code #\Linefeed)))
               (:crlf (nconc (translate-one-char #.(char-code #\Return))
                             (translate-one-char #.(char-code #\Linefeed))))))
            (t (translate-one-char char-code))))))
               
(defmethod stream-clear-output ((stream flexi-output-stream))
  "Simply calls the corresponding method for the underlying
output stream."
  (clear-output (flexi-stream-stream stream)))

(defmethod stream-finish-output ((stream flexi-output-stream))
  "Simply calls the corresponding method for the underlying
output stream."
  (finish-output (flexi-stream-stream stream)))

(defmethod stream-force-output ((stream flexi-output-stream))
  "Simply calls the corresponding method for the underlying
output stream."
  (force-output (flexi-stream-stream stream)))

(defmethod stream-line-column ((stream flexi-output-stream))
  "Returns the column stored in the COLUMN slot of the
FLEXI-OUTPUT-STREAM object STREAM."
  (flexi-stream-column stream))

(defmethod stream-terpri ((stream flexi-output-stream))
  "Writes a #\Newline character to the underlying stream."
  ;; needed for AllegroCL - grrr...
  (stream-write-char stream #\Newline))

(defmethod stream-write-byte ((stream flexi-output-stream) byte)
  "Writes a byte \(octet) to the underlying stream."
  ;; set column to NIL because we don't know how to handle binary
  ;; output mixed with character output
  (setf (flexi-stream-column stream) nil)
  (write-byte* byte stream))

(defmethod stream-write-char ((stream flexi-output-stream) char)
  "Writes a character to the underlying stream according to the
current encoding \(external format) of the FLEXI-OUTPUT-STREAM
object STREAM."
  ;; update the column unless we're in the middle of the line and
  ;; the current value is NIL
  (cond ((char= char #\Newline)
         (setf (flexi-stream-column stream) 0))
        ((flexi-stream-column stream)
         (incf (flexi-stream-column stream))))
  (dolist (byte (translate-char (char-code char) stream))
    (write-byte* byte stream))
  char)

(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key)
  "Writes all elements of the sequence SEQUENCE from START to END
to the underlying stream.  The elements can be either octets or
characters.  Characters are output according to the current
encoding \(external format) of the FLEXI-OUTPUT-STREAM object
STREAM."
  (loop for index from start below end
        for element = (elt sequence index)
        when (characterp element)
        do (stream-write-char stream element)
        else
        do (stream-write-byte stream element))
  sequence)
