1 ;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
2 ;;;; *************************************************************************
3 ;;;; FILE IDENTIFICATION
6 ;;;; Purpose: non-ASCII character support
7 ;;;; Programmer: Kevin M. Rosenberg
8 ;;;; Date Started: Feb 2010
10 ;;;; This file, part of UFFI, is Copyright (c) 2002-2010 by Kevin M. Rosenberg
12 ;;;; *************************************************************************
16 #-(or (and lispworks unicode) (and sbcl sb-unicode)
17 (and allegro ics) (and clisp i18n)
18 (and openmcl openmcl-unicode-strings))
19 (pushnew 'no-i18n cl:*features*)
21 (defvar *default-foreign-encoding*
23 "Normalized name of default external character format to use
24 for foreign string conversions. nil means use implementation default
27 (defvar *foreign-encoding-mapping*
28 #+(and lispworks unicode)
29 '((:ascii . :ascii) (:latin-1 . :latin-1) (:ucs-2 . :unicode)
30 (:utf-8 . :utf-8) (:jis . :jis) (:sjis . :sjis) (:gbk . :gbk))
31 #+(and sbcl sb-unicode)
32 '((:ascii . :ascii) (:latin-1 . :latin-1) (:utf-8 . :utf-8)
33 (:ucs-2 . :ucs-2) (:sjis . :sjis) (:gbk . :gbk))
35 '((:ascii . :ascii) (:latin-1 . :latin1) (:utf-8 . :utf-8)
36 (:sjis . :shift-jis) (:euc-jp . :euc) (:gbk . :gb2313)
39 '((:ascii . charset:ascii) (:ucs-2 . charset:ucs-2)
40 (:utf-8 . charset:utf-8) (:latin-1 . charset:iso-8859-1)
41 (:jis . charset:jis_x0201) (:jis . charset:shift-jis)
42 (:gbk . charset:gbk) (:euc-jp . charset:euc-jp))
43 #+(and openmcl openmcl-unicode-strings)
44 '((:ascii . :ascii) (:latin-1 . :iso-8859-1) (:utf-8 . :utf-8)
46 #+nil (:euc-jp . :euc-jp)
48 #-(or (and lispworks unicode) (and sbcl sb-unicode)
49 (and allegro ics) (and clisp unicode)
50 (and openmcl openmcl-unicode-strings))
52 "Mapping between normalized external format name and implementation name.")
54 (defvar *foreign-encodings*
55 (mapcar 'car *foreign-encoding-mapping*)
56 "List of normalized names of external formats support by underlying implementation.")
58 (defun lookup-foreign-encoding (normalized)
59 (cdr (assoc normalized *foreign-encoding-mapping* :test 'eql)))
61 (defmacro string-to-octets (str &key encoding null-terminate)
62 (declare (ignorable encoding))
63 #-(or allegro lispworks openmcl sbcl)
64 (map-into (make-array (length str) :element-type '(unsigned-byte 8))
68 (let ((fe (gensym "FE-"))
72 `(let* ((,fe (or ,encoding *default-foreign-encoding*))
73 (,ife (when ,fe (lookup-foreign-encoding ,fe)))
75 (,nt ,null-terminate))
78 (excl:string-to-octets ,s :external-format ,ife :null-terminate ,nt)
79 (excl:string-to-octets ,s :null-terminate ,nt)))))
82 ;; simply reading each char-code from the LENGTH of string handles
83 ;; multibyte characters in testing with CCL 1.5
84 (let ((len (gensym "LEN-"))
85 (out (gensym "OUT-")))
86 `(let ((,len (length ,str)))
89 (let ((,out (map-into (make-array (1+ ,len) :element-type '(unsigned-byte 8))
91 (setf (char ,out ,len) 0)
93 (map-into (make-array len :element-type '(unsigned-byte 8))
97 ;; simply reading each char-code from the LENGTH of string handles multibyte characters
98 ;; just fine in testing LW 6.0 and CCL 1.4
99 (let ((len (gensym "LEN-"))
100 (out (gensym "OUT-")))
101 `(let ((,len (length ,str)))
102 (if (,null-terminate)
104 (let ((,out (map-into (make-array (1+ ,len) :element-type '(unsigned-byte 8))
106 (setf (char ,out ,len) 0)
108 (map-into (make-array len :element-type '(unsigned-byte 8))
112 (let ((fe (gensym "FE-"))
113 (ife (gensym "IFE-"))
116 `(let* ((,fe (or ,encoding *default-foreign-encoding*))
117 (,ife (when ,fe (lookup-foreign-encoding ,fe)))
119 (,nt ,null-terminate))
121 (sb-ext:string-to-octets ,s :external-format ,ife :null-terminate ,nt)
122 (sb-ext:string-to-octets ,s :null-terminate ,nt))))
126 (defmacro octets-to-string (octets &key encoding)
127 "Converts a vector of octets to a Lisp string."
128 (declare (ignorable encoding))
129 #-(or allegro lispworks openmcl sbcl)
130 (let ((out (gensym "OUT-"))
131 (code (gensym "CODE-")))
132 `(with-output-to-string (,out)
133 (loop for ,code across ,octets
134 do (write-char (code-char ,code) ,out))))
137 (let ((fe (gensym "FE-"))
138 (ife (gensym "IFE-"))
139 (oct (gensym "OCTETS-")))
140 `(let* ((,fe (or ,encoding *default-foreign-encoding*))
141 (,ife (when ,fe (lookup-foreign-encoding ,fe)))
145 (excl:octets-to-string ,oct :external-format ,ife)
146 (excl:octets-to-string ,oct)))))
149 ;; With LW 6.0, writing multibyte character just one octet at a time
150 ;; produces expected formatted output, but strings lengths are too
151 ;; long and consists only of octets, not wide characters
153 ;; Below technique of using fli:convert-from-foreign-string works tp
154 ;; correctly create string of wide-characters. However, errors occur
155 ;; during formatted printing of such strings with an error such as
156 ;; "#\U+30D3 is not of type BASE-CHAR"
157 (let ((fe (gensym "FE-"))
158 (ife (gensym "IFE-"))
159 (oct (gensym "OCTETS-")))
160 `(let* ((,fe (or ,encoding *default-foreign-encoding*))
161 (,ife (when ,fe (lookup-foreign-encoding ,fe)))
163 (fli:with-dynamic-foreign-objects
164 ((ptr (:unsigned :byte) :initial-contents (coerce ,oct 'list)))
165 (fli:convert-from-foreign-string ptr
166 :length (length ,oct)
167 :null-terminated-p nil
168 :external-format ,ife))))
171 ;; With CCL 1.5, writing multibyte character just one octet at a time tests fine
172 (let ((out (gensym "OUT-"))
173 (code (gensym "CODE-")))
174 `(with-output-to-string (,out)
175 (loop for ,code across ,octets
176 do (write-char (code-char ,code) ,out))))
179 (let ((fe (gensym "FE-"))
180 (ife (gensym "IFE-"))
181 (oct (gensym "OCTETS-")))
182 `(let* ((,fe (or ,encoding *default-foreign-encoding*))
183 (,ife (when ,fe (lookup-foreign-encoding ,fe)))
186 (sb-ext:octets-to-string ,oct :external-format ,ife)
187 (sb-ext:octets-to-string ,oct))))
191 (defun foreign-encoded-octet-count (str &key encoding)
192 "Returns the octets required to represent the string when passed to a ~
194 (declare (ignorable encoding))
195 ;; AllegroCL 8-bit, CCL, and Lispworks give correct value without converting
196 ;; to external-format. AllegroCL 16-bit, SBCL, and CLISP requires conversion
197 ;; with external-format
199 #+(or (and allegro ics) (and sbcl sb-unicode) (and clisp i18n))
200 (length (string-to-octets str :encoding
201 (or encoding *default-foreign-encoding*)))
203 #-(or (and allegro ics) (and sbcl sb-unicode) (and clisp i18n))