From 2bb9a36070147d6c21e4a99d699ec3563c021a70 Mon Sep 17 00:00:00 2001 From: Kevin Rosenberg Date: Sun, 7 Feb 2010 02:07:43 -0700 Subject: [PATCH] Version 1.8.0: First version with external formats for encoding foreign strings --- ChangeLog | 4 +- debian/changelog | 4 +- src/i18n.lisp | 12 +- src/package.lisp | 4 +- src/strings.lisp | 406 +++++++++++++++++++++++++++-------------------- 5 files changed, 245 insertions(+), 185 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7075980..6365fcb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,8 +1,8 @@ 2010-02-06 Kevin Rosenberg - * Version 1.7.4 + * Version 1.8.0 * src/strings.lisp: Initial support for external-formats with foreign strings. - + 2010-02-05 Kevin Rosenberg * Version 1.7.3 * src/primitives.lisp: Fix symbol name diff --git a/debian/changelog b/debian/changelog index 76140a9..123bfb3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ -cl-uffi (1.7.4-1) unstable; urgency=low +cl-uffi (1.8.0-1) unstable; urgency=low * New upstream - -- Kevin M. Rosenberg Sat, 06 Feb 2010 20:10:09 -0700 + -- Kevin M. Rosenberg Sat, 06 Feb 2010 20:38:59 -0700 cl-uffi (1.7.3-1) unstable; urgency=low diff --git a/src/i18n.lisp b/src/i18n.lisp index 1d5ec0a..78c0499 100644 --- a/src/i18n.lisp +++ b/src/i18n.lisp @@ -18,13 +18,13 @@ (and openmcl openmcl-unicode-strings)) (pushnew 'no-i18n cl:*features*) -(defvar *default-external-format* +(defvar *default-foreign-encoding* nil "Normalized name of default external character format to use for foreign string conversions. nil means use implementation default encoding.") -(defvar *external-format-mapping* +(defvar *foreign-encoding-mapping* #+(and lispworks unicode) '((:ascii . :ascii) (:latin-1 . :latin-1) (:ucs-2 . :unicode) (:utf-8 . :utf-8) (:jis . :jis) (:sjis . :sjis) (:gbk . :gbk)) @@ -51,9 +51,9 @@ encoding.") nil "Mapping between normalized external format name and implementation name.") -(defvar *external-formats* - (mapcar 'car *external-format-mapping*) +(defvar *foreign-encodings* + (mapcar 'car *foreign-encoding-mapping*) "List of normalized names of external formats support by underlying implementation.") -(defun map-normalized-external-format (normalized) - (cdr (assoc normalized *external-format-mapping* :test 'eql))) +(defun implementation-foreign-encoding (normalized) + (cdr (assoc normalized *foreign-encoding-mapping* :test 'eql))) diff --git a/src/package.lisp b/src/package.lisp index b6cd204..4d3832a 100644 --- a/src/package.lisp +++ b/src/package.lisp @@ -83,8 +83,8 @@ ;; Internatialization #:no-i18n - #:*default-external-format* - #:*external-formats* + #:*default-foreign-encoding* + #:*foreign-encodings* )) diff --git a/src/strings.lisp b/src/strings.lisp index e1b57d0..2091164 100644 --- a/src/strings.lisp +++ b/src/strings.lisp @@ -3,7 +3,7 @@ ;;;; FILE IDENTIFICATION ;;;; ;;;; Name: strings.lisp -;;;; Purpose: UFFI source to handle strings, cstring and foreigns +;;;; Purpose: UFFI source to handle strings, cstrings, and foreigns ;;;; Programmer: Kevin M. Rosenberg ;;;; Date Started: Feb 2002 ;;;; @@ -29,14 +29,7 @@ that LW/CMU automatically converts strings from c-calls." `(let ((,stored ,obj)) (if (zerop ,stored) nil - (values - (excl:native-to-string - ,stored - :external-format - (if *default-external-format* - (map-normalized-external-format - *default-external-format*) - :default)))))) + (values (excl:native-to-string ,stored))))) #+(or openmcl digitool) (let ((stored (gensym))) `(let ((,stored ,obj)) @@ -52,13 +45,7 @@ that LW/CMU automatically converts strings from c-calls." `(let ((,stored ,obj)) (if (null ,stored) 0 - (values (excl:string-to-native - ,stored - :external-format - (if *default-external-format* - (map-normalized-external-format - *default-external-format*) - :default)))))) + (values (excl:string-to-native ,stored))))) #+(or openmcl digitool) (let ((stored (gensym))) `(let ((,stored ,obj)) @@ -90,21 +77,14 @@ that LW/CMU automatically converts strings from c-calls." (let ((acl-native (gensym)) (stored-lisp-string (gensym))) `(let ((,stored-lisp-string ,lisp-string)) - (excl:with-native-string (,acl-native ,stored-lisp-string - :external-format - (if *default-external-format* - (map-normalized-external-format - *default-external-format*) - :default)) + (excl:with-native-string (,acl-native ,stored-lisp-string) (let ((,cstring (if ,stored-lisp-string ,acl-native 0))) ,@body)))) #+(or openmcl digitool) (let ((stored-lisp-string (gensym))) `(let ((,stored-lisp-string ,lisp-string)) (if (stringp ,stored-lisp-string) - (ccl:with-encoded-cstrs - (or *default-external-format* :iso-8859-1) - ((,cstring ,stored-lisp-string)) + (ccl:with-cstrs ((,cstring ,stored-lisp-string)) ,@body) (let ((,cstring +null-cstring-pointer+)) ,@body)))) @@ -119,116 +99,205 @@ that LW/CMU automatically converts strings from c-calls." ;;; Foreign string functions -(defmacro convert-to-foreign-string (obj &optional external-format) - #+lispworks - (let ((stored (gensym "STR-")) - (ef (gensym "EF-"))) - `(let ((,stored ,obj) - (,ef (map-normalized-external-format - (or external-format *default-external-format*)))) - (if (null ,stored) - +null-cstring-pointer+ - (fli:convert-to-foreign-string - ,stored - :external-format ,ef)))) +(defun %convert-to-foreign-string (str foreign-encoding) + #+(or cmu scl) + (etypecase str + (null + (alien:sap-alien (system:int-sap 0) (* (alien:unsigned 8)))) + (string + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((size (length str)) + (storage (alien:make-alien (alien:unsigned 8) (1+ size)))) + (declare (fixnum size)) + (setq storage (alien:cast storage (* (alien:unsigned 8)))) + (dotimes (i size) + (declare (fixnum i)) + (setf (alien:deref storage i) + (char-code (char stored-obj i)))) + (setf (alien:deref storage size) 0)) + storage))) + + #+(and sbcl (not sb-unicode)) + (etypecase stored-obj + (null + (sb-alien:sap-alien (sb-sys:int-sap 0) (* (sb-alien:unsigned 8)))) + (string + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((size (length stored-obj)) + (storage (sb-alien:make-alien (sb-alien:unsigned 8) (1+ size)))) + (declare (fixnum i)) + (setq storage (sb-alien:cast storage (* (sb-alien:unsigned 8)))) + (dotimes (i size) + (declare (fixnum i)) + (setf (sb-alien:deref storage i) + (char-code (char stored-obj i)))) + (setf (sb-alien:deref storage size) 0)) + storage))) + + #+(and sbcl sb-unicode) + (etypecase str + (null + (sb-alien:sap-alien (sb-sys:int-sap 0) (* (sb-alien:unsigned 8)))) + (string + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((fe (or foreign-encoding *default-foreign-encoding*)) + (ife (when fe (implementation-foreign-encoding fe)))) + (if ife + (let* ((octets (sb-ext:string-to-octets str :external-format ife)) + (size (length octets)) + (storage (sb-alien:make-alien (sb-alien:unsigned 8) (+ size 2)))) + (declare (fixnum size)) + (setq storage (sb-alien:cast storage (* (sb-alien:unsigned 8)))) + (dotimes (i size) + (declare (fixnum i)) + (setf (sb-alien:deref storage i) (svref octets i))) + ;; terminate with 2 nulls, maybe needed for some encodings + (setf (sb-alien:deref storage size) 0) + (setf (sb-alien:deref storage (1+ size)) 0) + storage) + + (let* ((size (length str)) + (storage (sb-alien:make-alien (sb-alien:unsigned 8) (1+ size)))) + (declare (fixnum size)) + (setq storage (sb-alien:cast storage (* (sb-alien:unsigned 8)))) + (dotimes (i size) + (declare (fixnum i)) + (setf (sb-alien:deref storage i) + (char-code (char stored-obj i)))) + (setf (sb-alien:deref storage size) 0) + storage)))))) + + #+(and openmcl openmcl-unicode-strings) + (if (null str) + +null-cstring-pointer+ + (locally + (declare (optimize (speed 3) (safety 0))) + (let* ((fe (or foreign-encoding *default-foreign-encoding*)) + (ife (when fe (implementation-foreign-encoding fe)))) + (if ife + (let* ((octets (ccl:encode-string-to-octets str :external-format ife)) + (size (length octets)) + (ptr (new-ptr (+ size 2)))) + (declare (fixnum size)) + (dotimes (i size) + (declare (fixnum i)) + (setf (ccl:%get-unsigned-byte ptr i) (svref octets i))) + (setf (ccl:%get-unsigned-byte ptr size) 0) + (setf (ccl:%get-unsigned-byte ptr (1+ size)) 0) + ptr) + + (let ((ptr (new-ptr (1+ (length str))))) + (ccl::%put-cstring ptr str) + ptr))))) + + #+(or digitool (and openmcl (not openmcl-unicode-strings))) + (if (null str) + +null-cstring-pointer+ + (let ((ptr (new-ptr (1+ (length str))))) + (ccl::%put-cstring ptr str) + ptr)) + + #+(or allegro lispworks) + (declare (ignore str foreign-encoding)) + + ) + +(defmacro convert-to-foreign-string (obj &optional foreign-encoding) #+allegro (let ((stored (gensym "STR-")) - (ef (gensym "EF-"))) + (ef (gensym "EF-")) + (nef (gensym "NEF-"))) `(let ((,stored ,obj) - (,ef (map-normalized-external-format - (or external-format *default-external-format*)))) - (if (null ,stored) - 0 - (values (excl:string-to-native ,stored :external-format - (or ,ef :default)))))) - #+(or cmu scl) - (let ((size (gensym)) - (storage (gensym)) - (stored-obj (gensym)) - (i (gensym))) - `(let ((,stored-obj ,obj)) - (etypecase ,stored-obj - (null - (alien:sap-alien (system:int-sap 0) (* (alien:unsigned 8)))) - (string - (let* ((,size (length ,stored-obj)) - (,storage (alien:make-alien (alien:unsigned 8) (1+ ,size)))) - (setq ,storage (alien:cast ,storage (* (alien:unsigned 8)))) - (locally - (declare (optimize (speed 3) (safety 0))) - (dotimes (,i ,size) - (declare (fixnum ,i)) - (setf (alien:deref ,storage ,i) - (char-code (char ,stored-obj ,i)))) - (setf (alien:deref ,storage ,size) 0)) - ,storage))))) - #+sbcl - (let ((size (gensym)) - (storage (gensym)) - (stored-obj (gensym)) - (i (gensym))) - `(let ((,stored-obj ,obj)) - (etypecase ,stored-obj - (null - (sb-alien:sap-alien (sb-sys:int-sap 0) (* (sb-alien:unsigned 8)))) - (string - (let* ((,size (length ,stored-obj)) - (,storage (sb-alien:make-alien (sb-alien:unsigned 8) (1+ ,size)))) - (setq ,storage (sb-alien:cast ,storage (* (sb-alien:unsigned 8)))) - (locally - (declare (optimize (speed 3) (safety 0))) - (dotimes (,i ,size) - (declare (fixnum ,i)) - (setf (sb-alien:deref ,storage ,i) - (char-code (char ,stored-obj ,i)))) - (setf (sb-alien:deref ,storage ,size) 0)) - ,storage))))) - #+(or openmcl digitool) - (let ((stored-obj (gensym))) - `(let ((,stored-obj ,obj)) - (if (null ,stored-obj) - +null-cstring-pointer+ - (let ((ptr (new-ptr (1+ (length ,stored-obj))))) - (ccl::%put-cstring ptr ,stored-obj) - ptr)))) - ) + (,fe (or foreign-encoding *default-foreign-encoding*)) + (,ife (when ,fe + (implementation-foreign-encoding ,fe)))) + (cond + ((null ,stored) + 0) + ((null ,ife) + (values (excl:string-to-native ,stored))) + (t + (values (excl:string-to-native ,stored :external-format ,ife)))))) + + #+lispworks + (let ((stored (gensym "STR-")) + (fe (gensym "EF-")) + (ife (gensym "NEF-"))) + `(let* ((,stored ,obj) + (,fe (or ,foreign-encoding *default-foreign-encoding*)) + (,ife (when ,fe + (implementation-foreign-encoding ,fe)))) + (cond + ((null ,stored) + +null-cstring-pointer+) + ((null ,ife) + (fli:convert-to-foreign-string ,stored)) + (t + (fli:convert-to-foreign-string ,stored :external-format ,ife))))) + + #+(or cmu scl sbcl digitool openmcl) + `(%convert-to-foreign-string ,obj (implementation-foreign-encoding + (or ,foreign-encoding *default-foreign-encoding))) +) + ;; Either length or null-terminated-p must be non-nil (defmacro convert-from-foreign-string (obj &key length - external-format + foreign-encoding (null-terminated-p t)) #+allegro (let ((stored-obj (gensym "STR-")) - (ef (gensym "EF-"))) - `(let ((,stored-obj ,obj) - (,ef (map-normalized-external-format - (or ,external-format *default-external-format*)))) + (fe (gensym "FE-")) + (ife (gensym "IFE-"))) + `(let ((,stored-obj ,obj)) (if (zerop ,stored-obj) nil - (if ,ef - (values - (excl:native-to-string - ,stored-obj - ,@(when length (list :length length)) - :truncate (not ,null-terminated-p) - :external-format ,ef)) - (fast-native-to-string ,stored-obj ,length))))) + (let* ((,fe (or ,foreign-encoding *default-foreign-encoding*)) + (,ife (when ,fe (implementation-foreign-encoding ,fe)))) + (if ,ife + (values + (excl:native-to-string + ,stored-obj + ,@(when length (list :length length)) + :truncate (not ,null-terminated-p) + :external-format ,ife)) + (fast-native-to-string ,stored-obj ,length)))))) + #+lispworks + ;; FAST-NATIVE-TO-STRING (suprisingly) works just fine with UTF-8 multibyte character strings + ;; However, without knowledge of specific-encoding, the LENGTH call in FAST-NATIVE-TO-STRING + ;; may not be incorrect for some encodings/strings. + ;; This is a stop-gap until get tech support on why the below fails. + (let ((stored-obj (gensym "STR-"))) + `(let ((,stored-obj ,obj)) + (if (fli:null-pointer-p ,stored-obj) + nil + (fast-native-to-string ,stored-obj ,length)))) + ;; Below code doesn't work on tesing with LW 6.0 testing with a UTF-8 string. + ;; fli:convert-from-foreign-string with :external-format of :UTF-8 doesn't + ;; properly code multibyte characters. +#| (let ((stored-obj (gensym "STR-")) - (ef (gensym "EF-"))) - `(let ((,stored-obj ,obj) - (,ef (map-normalized-external-format - (or ,external-format *default-external-format*)))) + (fe (gensym "FE-")) + (ife (gensym "IFE-"))) + `(let ((,stored-obj ,obj)) (if (fli:null-pointer-p ,stored-obj) nil - (if ,ef - (fli:convert-from-foreign-string - ,stored-obj - ,@(when length (list :length length)) - :null-terminated-p ,null-terminated-p - :external-format (list ,ef)) - (fast-native-to-string ,stored-obj ,length))))) + (let* ((,fe (or ,foreign-encoding *default-foreign-encoding*)) + (,ife (when ,fe (implementation-foreign-encoding ,fe)))) + (if ,ife + (fli:convert-from-foreign-string + ,stored-obj + ,@(when length (list :length length)) + :null-terminated-p ,null-terminated-p + :external-format (list ,ife :eol-style :lf)) + (fast-native-to-string ,stored-obj ,length)))))) +|# + #+(or cmu scl) (let ((stored-obj (gensym))) `(let ((,stored-obj ,obj)) @@ -237,43 +306,52 @@ that LW/CMU automatically converts strings from c-calls." (cmucl-naturalize-cstring (alien:alien-sap ,stored-obj) :length ,length :null-terminated-p ,null-terminated-p)))) + #+(and sbcl (not sb-unicode)) + (let ((stored-obj (gensym))) + `(let ((,stored-obj ,obj)) + (if (null-pointer-p ,stored-obj) + nil + (sbcl-naturalize-cstring (sb-alien:alien-sap ,stored-obj) + :length ,length + :null-terminated-p ,null-terminated-p)))) - #+sbcl + #+(and sbcl sb-unicode) (let ((stored-obj (gensym "STR-")) - (ef (gensym "EF-"))) - `(let ((,stored-obj ,obj) - (,ef (map-normalized-external-format - (or ,external-format *default-external-format*)))) + (fe (gensym "FE-")) + (ife (gensym "IFE-"))) + `(let ((,stored-obj ,obj)) (if (null-pointer-p ,stored-obj) - nil - (if ,ef - (sb-alien::c-string-to-string (sb-alien:alien-sap ,stored-obj) - ,ef 'character) - (sbcl-naturalize-cstring (sb-alien:alien-sap ,stored-obj) - :length ,length - :null-terminated-p ,null-terminated-p))))) + nil + (let* ((,fe (or ,foreign-encoding *default-foreign-encoding*)) + (,ife (when ,fe (implementation-foreign-encoding ,fe)))) + (sb-alien::c-string-to-string (sb-alien:alien-sap ,stored-obj) + (or ,ife sb-impl::*default-external-format* :latin-1) + 'character))))) + #+(or openmcl digitool) (declare (ignore null-terminated-p)) #+(or openmcl digitool) (let ((stored-obj (gensym "STR-")) - (ef (gensym "EF-"))) - `(let ((,stored-obj ,obj) - (,ef (map-normalized-external-format - (or ,external-format *default-external-format*)))) + (fe (gensym "FE-")) + (ife (gensym "IFE-"))) + `(let ((,stored-obj ,obj)) (if (ccl:%null-ptr-p ,stored-obj) nil - #+digitool (ccl:%get-cstring - ,stored-obj 0 - ,@(if length (list length) nil)) - #+openmcl (case ,ef - (:utf-8 - (ccl::%get-utf-8-cstring ,stored-obj)) - (:ucs-2 - (ccl::%get-native-utf-16-cstring ,stored-obj)) - (t - ,@(if length - `((ccl:%str-from-ptr ,stored-obj ,length)) - `((ccl:%get-cstring ,stored-obj)))))))) + #+digitool + (ccl:%get-cstring + ,stored-obj 0 + ,@(if length (list length) nil)) + #+openmcl + (let ((,fe (or ,foreign-encoding *default-foreign-encoding*))) + (case ,fe + (:utf-8 + (ccl::%get-utf-8-cstring ,stored-obj)) + (:ucs-2 + (ccl::%get-native-utf-16-cstring ,stored-obj)) + (t + ,@(if length + `((ccl:%str-from-ptr ,stored-obj ,length)) + `((ccl:%get-cstring ,stored-obj))))))))) ) @@ -323,9 +401,12 @@ that LW/CMU automatically converts strings from c-calls." finally return size)) -(defmacro with-foreign-string ((foreign-string lisp-string) &body body) - (let ((result (gensym))) - `(let* ((,foreign-string (convert-to-foreign-string ,lisp-string)) +(defmacro with-foreign-string ((foreign-string lisp-string &optional foreign-encoding) + &body body) + (let ((result (gensym)) + (fe (gensym))) + `(let* ((,fe ,foreign-encoding) + (,foreign-string (convert-to-foreign-string ,lisp-string ,fe)) (,result (progn ,@body))) (declare (dynamic-extent ,foreign-string)) (free-foreign-object ,foreign-string) @@ -407,27 +488,6 @@ that LW/CMU automatically converts strings from c-calls." (* length +system-copy-multiplier+)) result))) -#+(and sbcl sb-unicode) -(defun sbcl-naturalize-cstring (sap &key length (null-terminated-p t)) - (declare (type sb-sys:system-area-pointer sap) - (type (or null fixnum) length)) - (locally - (declare (optimize (speed 3) (safety 0))) - (cond - (null-terminated-p - (let ((casted (sb-alien:cast (sb-alien:sap-alien sap (* char)) - #+sb-unicode sb-alien:utf8-string - #-sb-unicode sb-alien:c-string))) - (if length - (copy-seq (subseq casted 0 length)) - (copy-seq casted)))) - (t - (let ((result (make-string length))) - ;; this will not work in sb-unicode - (funcall *system-copy-fn* sap 0 result +system-copy-offset+ - (* length +system-copy-multiplier+)) - result))))) - (eval-when (:compile-toplevel :load-toplevel :execute) (def-function "strlen" @@ -455,5 +515,5 @@ that LW/CMU automatically converts strings from c-calls." (type char-ptr-def s)) (let* ((len (or len (strlen s))) (str (make-string len))) - (dotimes (i len str) - (setf (schar str i) (code-char (uffi:deref-array s '(:array :unsigned-byte) i)))))) + (dotimes (i len str) + (setf (schar str i) (code-char (uffi:deref-array s '(:array :unsigned-byte) i)))))) -- 2.34.1