From 1a33d3d010d00a5864402dfe5acde16cdddce02f Mon Sep 17 00:00:00 2001 From: "Kevin M. Rosenberg" Date: Wed, 20 Oct 2010 08:38:21 -0600 Subject: [PATCH] Change Lispwork's foreign string conversion to make wide character strings 2010-10-20 Kevin Rosenberg * src/{strings,i18n}.lisp: Rework foreign string conversion for Lispworks so that wide-character strings are produced (rather than strings of octets) --- ChangeLog | 5 +++++ src/i18n.lisp | 45 +++++++++++++++++++++++++++++++++++++++++---- src/strings.lisp | 14 +++++++++----- 3 files changed, 55 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index c138d54..07c8141 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-10-20 Kevin Rosenberg + * src/{strings,i18n}.lisp: Rework foreign string + conversion for Lispworks so that wide-character strings + are produced (rather than strings of octets) + 2010-04-20 Kevin Rosenberg * Version 2.0.0 * uffi.asdf: Update version so libraries using UFFI diff --git a/src/i18n.lisp b/src/i18n.lisp index 2de8234..a46b535 100644 --- a/src/i18n.lisp +++ b/src/i18n.lisp @@ -1,4 +1,4 @@ -;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: UFFI -*- +;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*- ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; @@ -78,7 +78,22 @@ encoding.") (excl:string-to-octets ,s :external-format ,ife :null-terminate ,nt) (excl:string-to-octets ,s :null-terminate ,nt))))) - #+(or lispworks openmcl) + #+ccl + ;; simply reading each char-code from the LENGTH of string handles + ;; multibyte characters in testing with CCL 1.5 + (let ((len (gensym "LEN-")) + (out (gensym "OUT-"))) + `(let ((,len (length ,str))) + (if (,null-terminate) + (progn + (let ((,out (map-into (make-array (1+ ,len) :element-type '(unsigned-byte 8)) + #'char-code ,str))) + (setf (char ,out ,len) 0) + ,out)) + (map-into (make-array len :element-type '(unsigned-byte 8)) + #'char-code str)))) + + #+lispworks ;; simply reading each char-code from the LENGTH of string handles multibyte characters ;; just fine in testing LW 6.0 and CCL 1.4 (let ((len (gensym "LEN-")) @@ -130,8 +145,30 @@ encoding.") (excl:octets-to-string ,oct :external-format ,ife) (excl:octets-to-string ,oct))))) - #+(or lispworks openmcl) - ;; With LW 6.0 and CCL 1.4, writing multibyte character just one octet at a time tests fine + #+lispworks + ;; With LW 6.0, writing multibyte character just one octet at a time + ;; produces expected formatted output, but strings lengths are too + ;; long and consists only of octets, not wide characters + ;; + ;; Below technique of using fli:convert-from-foreign-string works tp + ;; correctly create string of wide-characters. However, errors occur + ;; during formatted printing of such strings with an error such as + ;; "#\U+30D3 is not of type BASE-CHAR" + (let ((fe (gensym "FE-")) + (ife (gensym "IFE-")) + (oct (gensym "OCTETS-"))) + `(let* ((,fe (or ,encoding *default-foreign-encoding*)) + (,ife (when ,fe (lookup-foreign-encoding ,fe))) + (,oct ,octets)) + (fli:with-dynamic-foreign-objects + ((ptr (:unsigned :byte) :initial-contents (coerce ,oct 'list))) + (fli:convert-from-foreign-string ptr + :length (length ,oct) + :null-terminated-p nil + :external-format ,ife)))) + + #+(or ccl openmcl) + ;; With CCL 1.5, writing multibyte character just one octet at a time tests fine (let ((out (gensym "OUT-")) (code (gensym "CODE-"))) `(with-output-to-string (,out) diff --git a/src/strings.lisp b/src/strings.lisp index f6c6d9f..f5b5634 100644 --- a/src/strings.lisp +++ b/src/strings.lisp @@ -1,4 +1,4 @@ -;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10; Package: UFFI -*- +;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*- ;;;; ************************************************************************* ;;;; FILE IDENTIFICATION ;;;; @@ -270,19 +270,24 @@ that LW/CMU automatically converts strings from c-calls." (fast-native-to-string ,stored-obj ,length)))))) #+lispworks - ;; FAST-NATIVE-TO-STRING (suprisingly) works just fine with UTF-8 multibyte character strings + #| + ;; FAST-NATIVE-TO-STRING (suprisingly) works just fine to make strings + ;; for formatted printing with Lispworks and UTF-8 multibyte character strings. ;; However, without knowledge of specific-encoding, the LENGTH call in FAST-NATIVE-TO-STRING - ;; may not be incorrect for some encodings/strings. + ;; will be be incorrect for some encodings/strings and strings consist of octets rather + ;; than wide characters ;; This is a stop-gap until get tech support on why the below fails. (let ((stored-obj (gensym "STR-"))) `(let ((,stored-obj ,obj)) (if (fli:null-pointer-p ,stored-obj) nil (fast-native-to-string ,stored-obj ,length)))) + |# + #| ;; Below code doesn't work on tesing with LW 6.0 testing with a UTF-8 string. ;; fli:convert-from-foreign-string with :external-format of :UTF-8 doesn't ;; properly code multibyte characters. -#| + |# (let ((stored-obj (gensym "STR-")) (fe (gensym "FE-")) (ife (gensym "IFE-"))) @@ -298,7 +303,6 @@ that LW/CMU automatically converts strings from c-calls." :null-terminated-p ,null-terminated-p :external-format (list ,ife :eol-style :lf)) (fast-native-to-string ,stored-obj ,length)))))) -|# #+(or cmu scl) (let ((stored-obj (gensym))) -- 2.34.1