;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-UNICODE; Base: 10 -*- ;;; $Header: /usr/local/cvsrep/cl-unicode/api.lisp,v 1.32 2012-05-04 21:17:44 edi Exp $ ;;; Copyright (c) 2008-2012, Dr. Edmund Weitz. All rights reserved. ;;; Redistribution and use in source and binary forms, with or without ;;; modification, are permitted provided that the following conditions ;;; are met: ;;; * Redistributions of source code must retain the above copyright ;;; notice, this list of conditions and the following disclaimer. ;;; * Redistributions in binary form must reproduce the above ;;; copyright notice, this list of conditions and the following ;;; disclaimer in the documentation and/or other materials ;;; provided with the distribution. ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. (in-package :cl-unicode) (defun try-abbreviations (name scripts-to-try) "Helper function called by CHARACTER-NAMED when the :TRY-ABBREVIATIONS-P keyword argument is true. Tries to interpret NAME as an abbreviation for a longer Unicode name and returns the corresponding code point if it succeeds." (flet ((size-word (string) (if (ppcre:scan "[A-Z]" string) "CAPITAL" "SMALL")) (try (script size-word short-name) (or (character-named (format nil "~A ~A letter ~A" script size-word short-name) :want-code-point-p t) (character-named (format nil "~A letter ~A" script short-name) :want-code-point-p t) (character-named (format nil "~A ~A" script short-name) :want-code-point-p t)))) (ppcre:register-groups-bind (script short-name) ("^([^:]+):([^:]+)$" name) (let ((size-word (size-word short-name))) (return-from try-abbreviations (try script size-word short-name)))) (loop with size-word = (size-word name) for script in scripts-to-try thereis (try script size-word name)))) (defun unicode-name-reader (stream char arg) "The reader function used when the alternative character syntax is enabled." (declare (ignore char arg)) (let ((name (with-output-to-string (out) (write-char (read-char stream t nil t) out) (loop for next-char = (read-char stream t nil t) while (find next-char "abcdefghijklmnopqrstuvwxyz0123456789_-+:" :test 'char-equal) do (write-char next-char out) finally (unread-char next-char stream))))) (or (character-named name) (error 'character-not-found :name name)))) (defun %enable-alternative-character-syntax () "Internal function used to enable alternative character syntax and store current readtable on stack." (push *readtable* *previous-readtables*) (setq *readtable* (copy-readtable)) (set-dispatch-macro-character #\# #\\ 'unicode-name-reader) (values)) (defun %disable-alternative-character-syntax () "Internal function used to restore previous readtable." (setq *readtable* (if *previous-readtables* (pop *previous-readtables*) (copy-readtable nil))) (values)) (defgeneric unicode-name (c) (:documentation "Returns the Unicode name of a character as a string or NIL if there is no name for that particular character. C can be the character's code point \(a positive integer) or a \(Lisp) character assuming its character code is also its Unicode code point.") (:method ((char character)) (unicode-name (char-code char))) (:method ((code-point integer)) (or (gethash code-point *code-points-to-names*) (maybe-compute-hangul-syllable-name code-point) (maybe-compute-cjk-name code-point)))) (defgeneric unicode1-name (c) (:documentation "Returns the Unicode 1.0 name of a character as a string or NIL if there is no name for that particular character. This name is only non-NIL if it is significantly different from the Unicode name (see UNICODE-NAME). For control characters, sometimes the ISO 6429 name is returned instead. C can be the character's code point \(a positive integer) or a \(Lisp) character assuming its character code is also its Unicode code point.") (:method ((char character)) (unicode1-name (char-code char))) (:method ((code-point integer)) (values (gethash code-point *code-points-to-unicode1-names*)))) (defun character-named (name &key want-code-point-p (try-unicode1-names-p *try-unicode1-names-p*) (try-abbreviations-p *try-abbreviations-p*) (scripts-to-try *scripts-to-try*) (try-hex-notation-p *try-hex-notation-p*) (try-lisp-names-p *try-lisp-names-p*)) "Returns the character which has the name NAME \(a string) by looking up the Unicode name \(see UNICODE-NAME). If TRY-UNICODE1-NAMES is true, the Unicode 1.0 name \(see UNICODE1-NAME) will be used as a fallback. If TRY-ABBREVIATIONS-P is true, NAME is treated as an abbreviation as follows: If NAME contains a colon, it is interpreted as \"