"======================================================================
|
|   Localization and internationalization support
|
|   $Revision: 1.95.1$
|   $Date: 2000/12/27 10:45:49$
|   $Author: pb$
|
 ======================================================================"


"======================================================================
|
| Copyright 1988-92, 1994-95, 1999, 2000 Free Software Foundation, Inc.
| Written by Paolo Bonzini.
|
| This file is part of the GNU Smalltalk class library.
|
| The GNU Smalltalk class library is free software; you can redistribute it
| and/or modify it under the terms of the GNU Lesser General Public License
| as published by the Free Software Foundation; either version 2.1, or (at
| your option) any later version.
| 
| The GNU Smalltalk class library is distributed in the hope that it will be
| useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
| General Public License for more details.
| 
| You should have received a copy of the GNU Lesser General Public License
| along with the GNU Smalltalk class library; see the file COPYING.LESSER.
| If not, write to the Free Software Foundation, 59 Temple Place - Suite
| 330, Boston, MA 02111-1307, USA.  
|
 ======================================================================"

Object subclass: #Locale
    instanceVariableNames: 'language territory charset'
    classVariableNames: 'ValidLanguages ValidTerritories DefaultTerritories DefaultCharsets CharsetNames Aliases RootDirectory'
    poolDictionaries: 'Charsets'
    category: 'i18n-Messages'!

Locale class
    instanceVariableNames: 'posix default'!

Locale comment: 
'This object is an abstract superclass of objects related to the territory
and language in which the program is being used.  Instances of it are
asked about information on the current locale, and provide a means to be
asked for things with a common idiom, the #? binary message.'!

Locale subclass: #LcFile
    instanceVariableNames: 'cachedData'
    classVariableNames: ''
    poolDictionaries: ''
    category: 'i18n-Messages'!

LcFile comment: 
'LcFile is an abstract superclass of objects that read the information they
need from a locally stored file.'!

LcFile subclass: #LcPrintFormats
    instanceVariableNames: ''
    classVariableNames: ''
    poolDictionaries: ''
    category: 'i18n-Messages'!

LcPrintFormats comment: 
'LcPrintFormats subclasses have instances that understand #?,
#printString: and #print:on: (the last of which is abstract) which
provide a means to convert miscellaneous objects to Strings according
to the rules that are used in the given locale.'!

FileStream subclass: #BigEndianFileStream
    instanceVariableNames: ''
    classVariableNames: ''
    poolDictionaries: ''
    category: 'i18n-Messages'!

BigEndianFileStream comment:
'Unlike ByteStream and FileStream, this retrieves integer numbers in
big-endian (68000, PowerPC, SPARC) order.'!

!Locale class methodsFor: 'initialization'!

territories
    "Valid territory codes"
    ^#( 'AF' 'AL' 'DZ' 'AS' 'AD' 'AO' 'AI' 'AQ' 'AG' 'AR' 'AM'
	'AW' 'AU' 'AT' 'AZ' 'BS' 'BH' 'BD' 'BB' 'BY' 'BE' 'BZ'
	'BJ' 'BM' 'BT' 'BO' 'BA' 'BW' 'BV' 'BR' 'IO' 'BN' 'BG'
	'BF' 'BI' 'KH' 'CM' 'CA' 'CV' 'KY' 'CF' 'TD' 'CL' 'CN'
	'CX' 'CC' 'CO' 'KM' 'CG' 'CK' 'CR' 'CI' 'HR' 'CU' 'CY'
	'CS' 'DK' 'DJ' 'DM' 'DO' 'TP' 'EC' 'EG' 'SV' 'GQ' 'EE'
	'ET' 'FK' 'FO' 'FJ' 'FI' 'FR' 'GF' 'PF' 'TF' 'GA' 'GM'
	'GE' 'DE' 'GH' 'GI' 'GR' 'GL' 'GD' 'GP' 'GU' 'GT' 'GN'
	'GW' 'GY' 'HT' 'HM' 'HN' 'HK' 'HU' 'IS' 'IN' 'ID' 'IR'
	'IQ' 'IE' 'IL' 'IT' 'JM' 'JP' 'JO' 'KZ' 'KE' 'KI' 'KP'
	'KR' 'KW' 'KG' 'LA' 'LV' 'LB' 'LS' 'LR' 'LY' 'LI' 'LT'
	'LU' 'MO' 'MG' 'MY' 'MW' 'MV' 'ML' 'MT' 'MH' 'MQ' 'MR'
	'MU' 'MX' 'FM' 'MD' 'MC' 'MN' 'MS' 'MA' 'MZ' 'MM' 'NA'
	'NR' 'NP' 'NL' 'AN' 'NT' 'NC' 'NZ' 'NI' 'NE' 'NG' 'NU'
	'NF' 'MP' 'NO' 'OM' 'PK' 'PW' 'PA' 'PG' 'PY' 'PE' 'PH'
	'PN' 'PL' 'PT' 'PR' 'QA' 'RE' 'RO' 'RU' 'RW' 'SH' 'KN'
	'LC' 'PM' 'VC' 'WS' 'SM' 'ST' 'SA' 'SN' 'SE' 'SL' 'SG'
	'SI' 'SB' 'SO' 'ZA' 'ES' 'LK' 'SD' 'SR' 'SJ' 'SZ' 'SE'
	'CH' 'SY' 'TW' 'TJ' 'TZ' 'TH' 'TG' 'TK' 'TO' 'TT' 'TN'
	'TR' 'TM' 'TC' 'TV' 'UG' 'UA' 'AE' 'GB' 'US' 'UM' 'UY'
	'UZ' 'VU' 'VA' 'VE' 'VN' 'VG' 'VI' 'WF' 'EH' 'YE' 'YU'
	'ZR' 'ZM' 'ZW' 'SK' 'SP')!

languages
    "ISO639 language codes"
    ^#( 'aa' 'ab' 'af' 'am' 'ar' 'as' 'ay' 'az' 'ba' 'be' 'bg'
	'bh' 'bi' 'bn' 'bo' 'br' 'ca' 'co' 'cs' 'cy' 'da' 'de'
	'dz' 'el' 'en' 'eo' 'es' 'et' 'eu' 'fa' 'fi' 'fj' 'fo'
	'fr' 'fy' 'ga' 'gd' 'gl' 'gn' 'gu' 'ha' 'he' 'hi' 'hr'
	'hu' 'hy' 'ia' 'id' 'ie' 'ik' 'is' 'it' 'iu' 'ja' 'jw'
	'ka' 'kk' 'kl' 'km' 'kn' 'ko' 'ks' 'ku' 'ky' 'la' 'ln'
	'lo' 'lt' 'lv' 'mg' 'mi' 'mk' 'ml' 'mn' 'mo' 'mr' 'ms'
	'mt' 'my' 'na' 'ne' 'nl' 'no' 'oc' 'om' 'or' 'pa' 'pl'
	'ps' 'pt' 'qu' 'rm' 'rn' 'ro' 'ru' 'rw' 'sa' 'sd' 'sg'
	'sh' 'si' 'sk' 'sl' 'sm' 'sn' 'so' 'sq' 'sr' 'ss' 'st'
	'su' 'sv' 'sw' 'ta' 'te' 'tg' 'th' 'ti' 'tk' 'tl' 'tn'
	'to' 'tr' 'ts' 'tt' 'tw' 'ug' 'uk' 'ur' 'uz' 'vi' 'vo'
	'wo' 'xh' 'yi' 'yo' 'za' 'zh' 'zu')!

defaults
    ^#(	('af' 'ZA'	ISO88591)
	('ar' 'SA'	ISO88596)
	('bg' 'BG'	ISO88595)
	('br' 'FR'	ISO88591)
	('ca' 'ES'	ISO88591)
	('cs' 'CZ'	ISO88592)
	('cy' 'GB'	ISO88591)
	('de' 'DE'	ISO88591)
	('el' 'GR'	ISO88597)
	('en' 'US'	ISO88591)
	('eo' 'XX'	ISO88593)
	('es' 'ES'	ISO88591)
	('et' 'EE'	ISO88594)
	('eu' 'ES'	ISO88591)
	('fi' 'FI'	ISO88591)
	('fo' 'FO'	ISO88591)
	('fr' 'FR'	ISO88591)
	('ga' 'IE'	ISO88591)
	('gd' 'GB'	ISO88591)
	('gl' 'ES'	ISO88591)
	('gv' 'GB'	ISO88591)
	('he' 'IL'	ISO88598)
	('hr' 'HR'	ISO88592)
	('hu' 'HU'	ISO88592)
	('hy' 'AM'	ARMSCII8)
	('id' 'ID'	ISO88591)
	('is' 'IS'	ISO88591)
	('it' 'IT'	ISO88591)
	('ja' 'JP'	SJISX0208)
	('kl' 'GL'	ISO88591)
	('kw' 'GB'	ISO88591)
	('lo' 'LA'	MULELAO1)
	('lt' 'LT'	ISO88594)
	('lv' 'LV'	ISO88594)
	('mk' 'MK'	ISO88595)
	('nl' 'NL'	ISO88591)
	('no' 'NO'	ISO88591)
	('oc' 'FR'	ISO88591)
	('pl' 'PL'	ISO88592)
	('pt' 'PT'	ISO88591)
	('ro' 'RO'	ISO88592)
	('ru' 'RU'	KOI8R)
	('sh' 'YU'	ISO88592)
	('sk' 'SK'	ISO88592)
	('sl' 'SI'	ISO88592)
	('sp' 'YU'	ISO88595)
	('sq' 'AL'	ISO88592)
	('sr' 'SP'	ISO88592)
	('sv' 'SE'	ISO88591)
	('th' 'TH'	TACTIS)
	('tr' 'TR'	ISO88599)
	('uk' 'UA'	KOI8U)
	('vi' 'VN'	VISCII)
	('wa' 'BE'	ISO88591)
	('zh' 'TW'	BIG5))!

initialize
    ValidLanguages := self languages asSet.
    ValidTerritories := self territories asSet.
    Aliases := LookupTable new.
    DefaultTerritories := LookupTable new.
    DefaultCharsets := LookupTable new.
    CharsetNames := LookupTable new.

    self defaults do: [ :each |
	DefaultTerritories
	    at: (each at: 1)
	    put: (each at: 2).

	DefaultCharsets
	    at: (each at: 1)
	    put: (each at: 3).
    ].
    
    Charsets allClassesDo: [ :each |
	each charsetName isNil ifFalse: [
	    CharsetNames at: each put: each charsetName
	]
    ].
    
    Smalltalk addInit: [ self flush ]! !
    
!Locale class methodsFor: 'private'!

extractLocaleParts: aString
    | stream language territory charset |
    stream := aString readStream.
    language := stream upTo: $_.
    stream atEnd ifTrue: [ ^Array with: language ].
    territory := stream upTo: $. .
    stream atEnd ifTrue: [ ^Array with: language with: territory ].
    charset := stream upToEnd.
    ^Array with: language with: territory with: charset! !

!Locale class methodsFor: 'instance creation'!

flush
    self subclassesDo: [ :each | each flush ].
    default := nil!

default
    | env |
    default isNil ifFalse: [ ^default ].

    env := Smalltalk getenv: self category.
    env isNil ifTrue: [ env := Smalltalk getenv: 'LANG' ].
    env isNil ifTrue: [ ^self posix ].
    ^default := [ self fromString: env ]
	on: Error
	do: [ :ex | ex return: self posix ]!

fromString: aString
    | what |
    what := (aString includes: $_)
	ifTrue: [ self extractLocaleParts: aString ]
	ifFalse: [ Aliases at: aString ifAbsent: [ aString ] ].

    what size = 1 ifTrue: [ ^self language: (what at: 1) ].
    what size = 2 ifTrue: [ ^self language: (what at: 1) territory: (what at: 2) ].
    ^self language: (what at: 1) territory: (what at: 2) charset: (what at: 3)!

language: lang
    ^self
	language: lang
	territory: (DefaultTerritories at: lang)
	charset: (DefaultCharsets at: lang ifAbsent: [ #ISO88591 ])!

language: lang territory: territory
    ^self
	language: lang
	territory: territory
	charset: (DefaultCharsets at: lang ifAbsent: [ #ISO88591 ])!

language: lang territory: territory charset: charset
    | actualCharset |
    (ValidLanguages includes: lang asString)
	ifFalse: [ self error: 'invalid language ', lang ].

    (ValidTerritories includes: territory asString)
	ifFalse: [ self error: 'invalid territory ', territory ].

    "Yeah, these are namespaces!"
    actualCharset := Charsets
	classAt: charset
	ifAbsent: [ ISO88591 ].

    ^self basicNew
	language: lang
	territory: territory
	charset: actualCharset!
    
new
    self shouldNotImplement!

posix
    posix isNil ifTrue: [
	posix := self basicNew
	    language: #POSIX
	    territory: ''
	    charset: Smalltalk String
    ].
    ^posix! !

!Locale class methodsFor: 'directory structure'!

rootDirectory
    RootDirectory isNil ifTrue: [ ^Directory image, '/../locale' ].
    ^RootDirectory!

rootDirectory: aString
    self flush.
    RootDirectory := aString!

category
    ^'LANG'
! !

!Locale class methodsFor: 'accessing'!

? anObject
    ^self default ? anObject!

!Locale methodsFor: 'accessing'!

? anObject
    self subclassResponsibility!

category
    ^self class category!

charset
    ^charset!

language
    ^language!

territory
    ^territory!

isPosixLocale
    ^language == #POSIX! !

!Locale methodsFor: 'directory structure'!

territoryDirectory
    self isPosixLocale ifTrue: [ ^self languageDirectory ].
    ^'%1/%2_%3'
	bindWith: self class rootDirectory
	with: self language
	with: self territory!

languageDirectory
    ^'%1/%2'
	bindWith: self class rootDirectory
	with: self language! !

!Locale methodsFor: 'initialization'!

language: lang territory: terr charset: cs
    language := lang.
    territory := terr.
    charset := cs! !

!LcFile class methodsFor: 'signatures'!

bigEndianID
    self subclassResponsibility!

littleEndianID
    self subclassResponsibility! !

!LcFile class methodsFor: 'initialization'!

initialize
    Smalltalk addInit: [ LcFile onStartup ]!

onStartup
    LcFile allSubinstancesDo: [ :each | each flush ]! !

!LcFile methodsFor: 'initialization'!

language: lang territory: terr charset: cs
    super language: lang territory: terr charset: cs.
    self flush! !

!LcFile methodsFor: 'reading locale data'!

readData
    | fileName fileStream |
    cachedData ifFalse: [
	cachedData := true.
	self isPosixLocale ifTrue: [ ^self bePosix ].

	fileName := Directory
	    append: self class category
	    to: self territoryDirectory.

	fileStream := FileStream open: fileName mode: FileStream read.

	[
	    self
		readDataFrom: (self initReadingFrom: fileStream);
		setDefaults
	] on: ExError do: [ :ex | self bePosix. ex return ]
    ]!

flush
    cachedData := false.
    self isPosixLocale ifTrue: [ self readData ]! !

!LcFile methodsFor: 'private - reading locale data'!

bePosix
!

readDataFrom: aFileStream
    self subclassResponsibility!

initReadingFrom: aFileStream
    | offsets |
    aFileStream
	littleEndianMagicNumber: self class littleEndianID
	bigEndianMagicNumber: self class bigEndianID.

    offsets := (1 to: aFileStream nextLong)
        collect: [ :each | aFileStream nextLong ].

    ^aFileStream -> (ReadStream on: offsets)!

entryNumber: f
    ^f value position!

guessSize: f
    | ofs size |
    ofs := f value next.
    size := f value peek - ofs.
    f value skip: -1.
    ^size!

move: f toEntry: position
    f value position: position!

hasOptionalData: f
    ^f value atEnd not!

readWordFrom: f
    f key position: f value next.
    ^f key nextLong!

readStringArrayFrom: f size: n
    ^(1 to: n) collect: [ :each | self readStringFrom: f ]!

readStringFrom: f
    f key position: f value next.
    ^f key upTo: 0 asCharacter!

readByteFrom: f
    f key position: f value next.
    ^f key nextByte!

readByteArrayFrom: f size: size
    f key position: f value next.
    ^(f key next: size) asByteArray!

getFileStream: f
    f key position: f value next.
    ^f key!

bigEndianFirst: aBlock on: f
    | result |
    Bigendian ifFalse: [ self skipEntry: f ].
    result := aBlock value.
    Bigendian ifTrue: [ self skipEntry: f ].
    ^result!

littleEndianFirst: aBlock on: f
    | result |
    Bigendian ifFalse: [ self skipEntry: f ].
    result := aBlock value.
    Bigendian ifTrue: [ self skipEntry: f ].
    ^result!

skipEntry: f
    f value next!

readWordArrayFrom: f size: n
    f key position: f value next.
    ^(1 to: n) collect: [ :each | f key nextLong ]!

setDefaults
! !

!LcPrintFormats methodsFor: 'printing'!

? aNumber
    | stream |
    stream := WriteStream on: String new.	"### maybe 'self charset new'"
    self print: aNumber on: stream.
    ^stream contents!

printString: aNumber
    | stream |
    stream := WriteStream on: String new.	"### maybe 'self charset new'"
    self print: aNumber on: stream.
    ^stream contents!

print: aNumber on: aStream
    self subclassResponsibility! !


!BigEndianFileStream methodsFor: 'endianness switching'!

nextBytes: n signed: signed
    "Private - Get an integer out of the next anInteger bytes in the stream"

    | int |
    int := 0.
    int := self nextByte.
    (signed and: [ int > 127 ]) ifTrue: [ int := int - 256 ].
    int := int bitShift: n * 8 - 8.

    n * 8 - 16 to: 0 by: -8 do: [ :i |
	int := int + (self nextByte bitShift: i).
    ].
    ^int!

nextPutBytes: n of: anInteger
    "Private - Store the n least significant bytes of int in big-endian format"
    | int |

    int := (anInteger < 0)
	ifTrue: [ anInteger + (1 bitShift: 8 * n) ]
	ifFalse: [ anInteger ].

    8 - n * 8 to: 0 by: 8 do: [ :i |
	self nextPutByte: ((int bitShift: i) bitAnd: 255).
    ]
! !


!FileStream methodsFor: 'endianness checking'!

littleEndianMagicNumber: le bigEndianMagicNumber: be
    "Change the receiver to a BigEndianFileStream if the
     next bytes are equal to `be', do nothing if they're equal
     to `le'; fail if the two parameters have different sizes,
     or if neither of them matches the next bytes.  The position
     in the file is not touched if matching fails, else it is
     moved past the signature."
    | magic |
    le size = be size ifFalse: [
	self error: 'mismatching sizes for big-endian and little-endian' ].

    magic := (self next: le size) asByteArray.
    magic = be ifTrue: [
	self changeClassTo: BigEndianFileStream ].
    magic = le ifFalse: [
	self skip: le size negated.
	self error: 'mismatching magic number' ].
! !



!CharacterArray methodsFor: 'comparing'!

compareTo: aCharacterArray
    "Answer a number < 0 if the receiver is less than aCharacterArray,
    a number > 0 if it is greater, or 0 if they are equal.  This does
    a three-way comparison."
    | c1 c2 |
    1 to: (self size min: aCharacterArray size) do: [ :i |
	c1 := (self at: i) value.
	c2 := (aCharacterArray at: i) value.
	c1 = c2 ifFalse: [ ^c1 - c2 ].
    ].

    ^self size - aCharacterArray size
! !

Locale initialize!
LcFile initialize!
