diff --git a/kdecore/kcharsetsdata.cpp b/kdecore/kcharsetsdata.cpp index e659e7c0..16786af2 100644 --- a/kdecore/kcharsetsdata.cpp +++ b/kdecore/kcharsetsdata.cpp @@ -1,1010 +1,1011 @@ /* This file is part of the KDE libraries Copyright (C) 1997 Jacek Konieczny (jajcus@zeus.polsl.gliwice.pl) $Id: kcharsetsdata.cpp,v 1.25.2.6 1999/04/07 14:03:48 pbrown Exp $ This library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with this library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "kcharsetsdata.h" #include #include #include #include #include #include #include #include #include #include #include #include #ifdef KCH_DEBUG inline void kchdebug(const char *msg,...){ va_list ap; va_start( ap, msg ); // use variable arg list vfprintf( stderr, msg, ap ); va_end( ap ); } #endif KCharsetConverterData::KCharsetConverterData(const KCharsetEntry * inputCharset ,const KCharsetEntry * outputCharset,int flags){ kchdebug("Creating converter from %s to %s...",inputCharset,outputCharset); tempResult=new KCharsetConversionResult(); inAmps=( (flags&KCharsetConverter::INPUT_AMP_SEQUENCES)!=0 ); outAmps=( (flags&KCharsetConverter::OUTPUT_AMP_SEQUENCES)!=0 ); if ( kcharsetsData == 0 ) fatal("KCharsets not initialized!"); isOK=initialize(inputCharset,outputCharset); kchdebug("done"); } KCharsetConverterData::KCharsetConverterData(const KCharsetEntry * inputCharset ,int flags){ kchdebug("Creating converter from %s...",inputCharset); tempResult=new KCharsetConversionResult(); inAmps=( (flags&KCharsetConverter::INPUT_AMP_SEQUENCES)!=0 ); outAmps=( (flags&KCharsetConverter::OUTPUT_AMP_SEQUENCES)!=0 ); if ( kcharsetsData == 0 ) fatal("KCharsets not initialized!"); isOK=initialize(inputCharset,0); kchdebug("done"); } KCharsetConverterData::~KCharsetConverterData(){ if (tempResult) delete tempResult; if (convFromUniDict) delete convFromUniDict; } bool KCharsetConverterData::initialize(const KCharsetEntry * inputCharset ,const KCharsetEntry * outputCharset){ convTable=0; convToUniDict=0; convFromUniDict=0; input=inputCharset; if (!input) { kchdebug("Couldn't set input charset to %s\n",inputCharset); return FALSE; } if (outputCharset==0) output=kcharsetsData->conversionHint(input); else output=outputCharset; if (!output) { kchdebug("Couldn't set output charset to %s\n",outputCharset); return FALSE; } setInputSettings(); setOutputSettings(); if ( !input || !output || input==output ) conversionType=NoConversion; else if (unicodeIn && unicodeOut) conversionType=UnicodeUnicode; else if (unicodeIn){ conversionType=FromUnicode; kchdebug("Conversion: from unicode\n"); if (!createFromUnicodeDict()) return FALSE; } else if (unicodeOut){ conversionType=ToUnicode; kchdebug("Conversion: to unicode\n"); if (!getToUnicodeTable()) return FALSE; } else{ conversionType=EightToEight; kchdebug("Conversion: no unicode\n"); if (!getToUnicodeTable()) return FALSE; if (!createFromUnicodeDict()) return FALSE; } return TRUE; } bool KCharsetConverterData::getToUnicodeTable(){ convTable=kcharsetsData->getToUnicodeTable(input); if (!convTable){ convToUniDict=kcharsetsData->getToUnicodeDict(input); if (!convToUniDict){ kchdebug("Couldn't get conversion table nor dictionary\n"); return FALSE; } } return TRUE; } void KCharsetConverterData::setInputSettings(){ const char *name=input->name; // if ( ! stricmp(name,"unicode-1-1-utf-7") ){ // warning("Sorry, UTF-7 encoding is not supported yet\n"); // inputEnc=UTF7; // inBits=0; // unicodeIn=TRUE; // } // else if ( ! stricmp(name,"unicode-1-1-utf-8") ){ inputEnc=UTF8; inBits=0; unicodeIn=TRUE; } else if ( ! stricmp(name,"unicode") ){ inputEnc=none; inBits=16; unicodeIn=TRUE; } else if ( ! stricmp(name,"us-ascii") ){ inputEnc=none; inBits=7; unicodeIn=FALSE; } else{ inputEnc=none; inBits=8; unicodeIn=FALSE; } } void KCharsetConverterData::setOutputSettings(){ const char *name=output->name; // if ( ! stricmp(name,"unicode-1-1-utf-7") ){ // warning("Sorry, UTF-7 encoding is not supported yet\n"); // outputEnc=UTF7; // outBits=0; // unicodeOut=TRUE; // } // else if ( ! stricmp(name,"unicode-1-1-utf-8") ){ outputEnc=UTF8; outBits=0; unicodeOut=TRUE; } else if ( ! stricmp(name,"unicode-1-1") ){ warning("Sorry, Unicode probably doesn't work (except UTF-8)\n"); outputEnc=none; outBits=16; unicodeOut=TRUE; } else if ( ! stricmp(name,"us-ascii") ){ outputEnc=none; outBits=7; unicodeOut=FALSE; } else{ outputEnc=none; outBits=8; unicodeOut=FALSE; } } bool KCharsetConverterData::decodeUTF7(const char*,unsigned int &,int &){ return FALSE; } bool KCharsetConverterData::decodeUTF8(const char*str,unsigned int &code ,int &extrachars){ code=0; extrachars=0; unsigned char chr=*str; // kchdebug("str: "); // for(int i=0;i<6 && str[i];i++) // kchdebug("%02x ",(int)(unsigned char)str[i]); // kchdebug("\n"); if ( (chr&0x80)==0 ){ code=chr&0x7f; extrachars=0; } else if ( (chr&0xe0)==0xc0 ){ code=chr&0x1f; extrachars=1; } else if ( (chr&0xf0)==0xe0 ){ code=chr&0x0f; extrachars=2; } else if ( (chr&0xf8)==0xf0 ){ code=chr&0x07; extrachars=3; } else if ( (chr&0xfc)==0xf8 ){ code=chr&0x03; extrachars=4; } else if ( (chr&0xfe)==0xf8 ){ code=chr&0x01; extrachars=5; } else { warning("Invalid UTF-8 sequence %2x%2x...!",(int)chr,(int)str[1]); return FALSE; } int chars=extrachars; while(chars>0){ str++; code<<=6; // kchdebug("Code: %4x char: %2x masked char: %2x\n",code,*str,(*str)&0x3f); code|=(*str)&0x3f; chars--; } return TRUE; } bool KCharsetConverterData::encodeUTF7(unsigned int,QString &){ return FALSE; } bool KCharsetConverterData::encodeUTF8(unsigned int code,QString &result){ if (code<0x80){ result+=(char)code; return TRUE; } // kchdebug("Code: %4x\n",code); int octets=2; unsigned mask1=0xc0; unsigned mask2=0x1f; unsigned range=0x800; int left=24; while(code>range){ if (range>=0x40000000){ warning("Unicode value too big!"); return FALSE; } mask2=(mask2>>1)&0x80; mask1>>=1; range<<=5; octets++; left-=6; } // kchdebug("octets: %i mask1: %x mask2: %x range: %x left: %i\n" // ,octets,mask1,mask2,range,left); unsigned char chr=((code>>((octets-1)*6))&mask2)|mask1; // kchdebug("Chars: %02x ",chr); result+=chr; octets--; unsigned int tmp=(code<0){ chr=((tmp>>24)&0x3f)|0x80; // kchdebug("%02x ",chr); result+=chr; tmp<<=6; octets--; } // kchdebug("\n"); return TRUE; } void KCharsetConverterData::convert(const char *str,KCharsetConversionResult &r){ convert(str,r,0); } void KCharsetConverterData::convert(const char *str,QList &rl){ unsigned unicode; rl.clear(); while(str){ KCharsetConversionResult *l=new KCharsetConversionResult; kchdebug("Created result: %p\n",l); str=convert(str,*l,&unicode); rl.append(l); if (unicode){ KCharsetConversionResult *l=new KCharsetConversionResult; kchdebug("Created result: %p\n",l); kcharsetsData->convert(unicode,*l); rl.append(l); } } } const char * KCharsetConverterData::convert(const char * str ,KCharsetConversionResult &result ,unsigned *pUnicode) { kchdebug("Setteing result charset to %p ",&output); kchdebug("(%s)\n",(const char *)output); result.cCharset=output; kchdebug("----- %s ----- => ",str); if (!isOK) return 0; if (conversionType == NoConversion ){ result.cText=str; return 0; } result.cText=""; int i; int tmp; unsigned *ptr=0; unsigned index=0; unsigned unicode=0; unsigned chr=0; for(i=0;(inBits<=8)?str[i]:(str[i]&&str[i+1]);){ chr=0; index=0; unicode=0; if (inAmps && str[i]=='&'){ kchdebug("Amperstand found\n"); unicode=kcharsetsData->decodeAmp(str+i,tmp); kchdebug("i=%i characters: %i code:%4x\n",i,tmp,unicode); if (tmp>0) i+=tmp-1; } if (unicode==0) switch(inputEnc){ case UTF7: if (decodeUTF7(str+i,unicode,tmp)) i+=tmp; else unicode=(unsigned char)str[i]; break; case UTF8: if (decodeUTF8(str+i,unicode,tmp)) i+=tmp; else unicode=(unsigned char)str[i]; break; default: if (inBits<=8) index=(unsigned char)str[i]; else if (inBits==16) index=(((unsigned char)str[i++])<<8)+(unsigned char)str[i]; break; } kchdebug("Got index: %x\n",index); if (index>0 || unicode>0) switch(conversionType){ case ToUnicode: if (unicode>0) chr=unicode; else if (convTable) chr=convTable[index]; else if (convToUniDict) { ptr=(*convToUniDict)[index]; if (ptr) chr=*ptr; else chr=0; } if (chr==0 && index>0 && index<0x20) chr=index; // control characters - do not change break; case FromUnicode: ptr=(*convFromUniDict)[unicode]; if (ptr) chr=*ptr; else chr=0; if (unicode>0 && unicode<0x20) chr=index; // control characters - do not change break; case UnicodeUnicode: chr=unicode; break; default: if (unicode==0) if (convTable) unicode=convTable[index]; else{ ptr=(*convToUniDict)[index]; if (ptr) unicode=*ptr; else unicode=0; } // kchdebug("Converted to unicode: %4x\n",index); if (unicode){ ptr=(*convFromUniDict)[unicode]; if (ptr) chr=*ptr; else chr=0; } else chr=0; if (chr==0 && index>0 && index<0x20) chr=index; // control characters - do not change break; } // kchdebug("Converted to: %x\n",chr); if (outputEnc==UTF8) encodeUTF8(chr,result.cText); else if (outputEnc==UTF7) encodeUTF7(chr,result.cText); else if (chr==0) if (unicode && pUnicode){ *pUnicode=unicode; i++; if (inBits>8 && str[i]) i++; result.cCharset=output; if (str[i]) return str+i; else return 0; } else if (unicode && unicode<0x20) result.cText+=(char)unicode; else if (outAmps){ if (unicode) result.cText+="&#"+QString().setNum(unicode)+';'; else result.cText+="?"; } else result.cText+="?"; else if (outBits==16){ result.cText+=(char)(chr>>8); result.cText+=(char)(chr&255); } else result.cText+=(char)chr; i++; if (inBits>8 && str[i]) i++; } kchdebug("----- %s -----\n",(const char *)result); if (pUnicode) *pUnicode=0; return 0; } const KCharsetConversionResult & KCharsetConverterData::convert(unsigned code){ unsigned chr; const unsigned *ptr; kchdebug("KCCS:convert(code) "); if (convFromUniDict){ ptr=(*convFromUniDict)[code]; if (!ptr) chr=0; else chr=*ptr; } else chr=0; if (chr==0){ kcharsetsData->convert(code,*tempResult); if (!tempResult->cText.isEmpty()) return *tempResult; } if (chr==0) if (outAmps){ if (code) tempResult->cText+="&#"+QString().setNum(code)+';'; else tempResult->cText+="?"; } else tempResult->cText+="?"; else tempResult->cText=chr; return *tempResult; } const KCharsetConversionResult & KCharsetConverterData::convertTag( const char *tag,int &l){ kchdebug("Converting: %s\n",(const char *)tag); return convert(kcharsetsData->decodeAmp(tag,l)); } bool KCharsetConverterData::createFromUnicodeDict(){ QIntDict * dict=new QIntDict; dict->setAutoDelete(TRUE); const unsigned *tbl=kcharsetsData->getToUnicodeTable(output); if (tbl) for(int i=0;i<(1<insert(tbl[i],new unsigned(i)); else{ QIntDict * dict2=kcharsetsData->getToUnicodeDict(output); if (!dict2){ kchdebug("Couldn't get to unicode table for %s\n",output->name); delete dict; return FALSE; } QIntDictIterator it(*dict2); while( it.current() ){ dict->insert(*it.current(),new unsigned(it.currentKey())); ++it; } } convFromUniDict=dict; return TRUE; } const char * KCharsetConverterData::outputCharset()const{ return output->name; } ///////////////////////////////////////////////// KCharsetsData::KCharsetsData(){ displayableCharsDict=0; tempResult=new KCharsetConversionResult; QString fileName=KApplication::kde_configdir() + "/charsets"; kchdebug("Reading config from %s...\n",(const char *)fileName); config=new KSimpleConfig(fileName, TRUE); config->setGroup("general"); QString i18dir = config->readEntry("i18ndir"); if (i18dir) scanDirectory(i18dir); kchdebug("Creating alias dictionary...\n"); KEntryIterator *it=config->entryIterator("aliases"); if ( it ) { while( it->current() ){ const char*alias=it->currentKey(); kchdebug(" %s -> ",alias); const char*name=it->current()->aValue; kchdebug(" %s:",name); KCharsetEntry *ce=varCharsetEntry(name); if (ce){ aliases.insert(alias,ce); kchdebug("ok\n"); } else kchdebug("not found\n"); ++(*it); } } delete it; kchdebug("done!\n"); } void KCharsetsData::scanDirectory(const char *path){ kchdebug("Scanning directory: %s\n",path); QDir d(path); if ( ! d.exists() ) return; d.setFilter(QDir::Files); d.setSorting(QDir::Name); const QFileInfoList *list=d.entryInfoList(); QFileInfoListIterator it(*list); QFileInfo *fi; while( (fi=it.current()) ){ QString name=fi->fileName(); QString alias=name.copy(); int comma=alias.find(','); if (comma) alias.remove(comma,alias.length()-comma); else alias=""; if (!charsetEntry(alias) && !charsetEntry(name)){ KCharsetEntry *entry=new KCharsetEntry; char *ptr=new char [fi->fileName().length()+1]; strcpy(ptr,name); entry->name=ptr; entry->qtCharset=QFont::AnyCharSet; entry->toUnicode=0; entry->registered=FALSE; entry->toUnicodeDict=0; i18nCharsets.insert(name.lower(),entry); if (alias!="") aliases.insert(alias,entry); } ++it; } } void KCharsetsData::createDictFromi18n(KCharsetEntry *e){ kchdebug("Creating unicode dict for %s\n",e->name); config->setGroup("general"); QString dir=config->readEntry("i18ndir"); kchdebug("Dir: %s\n",(const char *)dir); QString filename=dir+'/'+e->name; kchdebug("Trying to open file %s\n",(const char *)filename); QFile f(filename); if (!f.open(IO_ReadOnly)) return; QTextStream t(&f); QString l; do{ l=t.readLine(); }while(!t.eof() && l!="CHARMAP"); if (t.eof()){ f.close(); return; } QIntDict *dict=new QIntDict; dict->setAutoDelete(TRUE); char codeBuf[20]; char unicodeBuf[10]; unsigned unicode; unsigned code; while(!t.eof()){ l=t.readLine(); if (l=="END CHARMAP") break; sscanf(l,"%*s %16s %8s %*s",codeBuf,unicodeBuf); sscanf(unicodeBuf,"",&unicode); code=0; if ( sscanf(codeBuf,"/x%X",&code) < 1 ) code=codeBuf[0]; kchdebug("(%s %s) %x->%x\n",codeBuf,unicodeBuf,code,unicode); dict->insert(code,new unsigned(unicode)); } e->toUnicodeDict=dict; defaultCh=charsetEntry("us-ascii"); } KCharsetsData::~KCharsetsData(){ if (tempResult) delete tempResult; QDictIterator it(i18nCharsets); KCharsetEntry *e; while( (e=it.current()) ){ ++it; if (e->toUnicodeDict) delete e->toUnicodeDict; if (e->name) delete e->name; delete e; } if (displayableCharsDict) delete displayableCharsDict; delete config; } KCharsetEntry * KCharsetsData::varCharsetEntry(const char *name){ for(int i=0;charsets[i].name;i++){ if ( stricmp(name,charsets[i].name) == 0 ){ kchdebug("Found!\n"); return charsets+i; } } KCharsetEntry *e=i18nCharsets[QString(name).lower()]; if (!e){ kchdebug("Searchin in aliases...\n"); e=aliases[QString(name).lower()]; } return e; } const KCharsetEntry * KCharsetsData::charsetEntry(int index){ int i; for(i=0;charsets[i].name;i++) if ( i==index ) return charsets+i; QDictIterator it(i18nCharsets); KCharsetEntry *e = 0L; while( (e=it.current()) ){ if (i==index) return e; ++i; ++it; } return 0; } const KCharsetEntry * KCharsetsData::charsetEntry(QFont::CharSet qtCharset){ int i; for(i=0;charsets[i].name;i++) if ( charsets[i].qtCharset==qtCharset ) return charsets+i; return 0; } bool KCharsetsData::setDefaultCharset(const KCharsetEntry *charset){ if (charset){ defaultCh=charset; return TRUE; } return FALSE; } QString KCharsetsData::charsetFace(const KCharsetEntry *charset ,const QString &face){ config->setGroup("faces"); const char *faceStr=config->readEntry(charset->name); if (!faceStr) return face; QString newFace(faceStr); newFace.replace(QRegExp("\\*"),face); return newFace; } bool KCharsetsData::charsetOfFace(const KCharsetEntry * charset,const QString &face){ kchdebug("Testing if face %s is of charset %s...",(const char *)face, charset->name); config->setGroup("faces"); const char *faceStr=config->readEntry(charset->name); kchdebug("%s...",faceStr); QRegExp rexp(faceStr,FALSE,TRUE); if (face.contains(rexp)){ kchdebug("Yes, it is\n"); return TRUE; } kchdebug("No, it isn't\n"); return FALSE; } const KCharsetEntry* KCharsetsData::charsetOfFace(const QString &face){ kchdebug("Searching for charset for face %s...\n",(const char *)face); KEntryIterator * it=config->entryIterator("faces"); if (!it) return 0; while( it->current() ){ const char * faceStr=it->current()->aValue; const char * key=it->currentKey(); if (!faceStr || faceStr[0]==0){ delete it; return charsetEntry(key); } kchdebug("testing if it is %s (%s)...",(const char *)it->currentKey(),faceStr); QRegExp rexp(faceStr,FALSE,TRUE); kchdebug("regexp: %s face: %s\n",rexp.pattern(),(const char *)face); if (face.contains(rexp)){ kchdebug("Yes, it is\n"); delete it; return charsetEntry(key); } kchdebug("No, it isn't\n"); ++(*it); } delete it; return 0; } const unsigned *KCharsetsData::getToUnicodeTable(const KCharsetEntry *charset){ if (!charset) return 0; return charset->toUnicode; } QIntDict *KCharsetsData::getToUnicodeDict(const KCharsetEntry *charset){ if (!charset) return 0; if (charset->toUnicodeDict == 0) createDictFromi18n(varCharsetEntry(charset->name)); return charset->toUnicodeDict; } const char *KCharsetsData::faceForCharset(const KCharsetEntry *charset){ config->setGroup("faces"); return config->readEntry(charset->name); } const KCharsetEntry *KCharsetsData::conversionHint(const KCharsetEntry *charset){ QStrList list; kchdebug("Serching for conversion hint for %s\n",charset->name); config->setGroup("conversionHints"); int n=config->readListEntry(charset->name,list); kchdebug("%i entries found\n",n); for(const char *hint=list.first();hint;hint=list.next()){ kchdebug("Found: %s\n",hint); KCharsetEntry *ce=varCharsetEntry(hint); if (isDisplayable(ce)) return ce; } return defaultCh; } bool KCharsetsData::getFontList(QStrList*lst,QString xcharsetname){ char **fontNames; int numFonts; QString mask("-*-*-*-*-*-*-*-*-*-*-*-*-"); QString qfontname; Display *kde_display; if (!lst) return FALSE; kde_display = XOpenDisplay( 0L ); mask+=xcharsetname; fontNames = XListFonts(kde_display, mask, 32767, &numFonts); for(int i = 0; i < numFonts; i++){ QString face; qfontname = fontNames[i]; int dash = qfontname.find ('-', 1, TRUE); // find next dash if (dash == -1) continue; // the font name is between the second and third dash so: // let's find the third dash: int dash_two = qfontname.find ('-', dash + 1 , TRUE); if (dash == -1) continue; // fish the name of the font info string face = qfontname.mid(dash +1, dash_two - dash -1); if(qfontname.find("-p-") != -1) face += "-p"; if(qfontname.find("-0-0-0-0-") != -1) face += "-s"; if(qfontname.find("-b-") != -1) face += "-b"; if(qfontname.find("-i-") != -1) face += "-i"; if(!lst->contains(face)) lst->append(face); } XFreeFontNames(fontNames); + return TRUE; } bool KCharsetsData::isDisplayableHack(KCharsetEntry *charset){ QFont::CharSet qcharset=charset->qtCharset; QString face=faceForCharset(charset); if ( !face.isEmpty() ){ if ( face.isEmpty()) return FALSE; QFont f(face); f.setCharSet(qcharset); f.setFamily(face); QFontInfo fi(f); kchdebug("fi.charset()=%i fi.family()=%s\n",fi.charSet(),fi.family()); if (fi.family()!=face ) return FALSE; /* This face will work for this charset, remember it */ if (!charset->good_family) charset->good_family=new QString; if (charset->good_family->isEmpty()) *(charset->good_family)=face; return TRUE; } return FALSE; } //#define kchdebug printf bool KCharsetsData::isDisplayable(KCharsetEntry *charset){ QFont::CharSet qcharset=charset->qtCharset; kchdebug("qtcharset=%i\n",qcharset); /* Qt doesn't support this charset. We must use the hack */ if (qcharset==QFont::AnyCharSet && strcmp(charset->name,"us-ascii")!=0) return isDisplayableHack(charset); kchdebug("searching for charset %s\n", charset->name); if (charset->good_family){ kchdebug("found good family\n"); if (charset->good_family->isEmpty()) /* no good_family is known */ return isDisplayableHack(charset); return TRUE; } QStrList lst; kchdebug("getting good families, charset=%s\n",toX(charset->name).data()); getFontList(&lst,toX(charset->name)); if(lst.isEmpty()) { charset->good_family=new QString; return isDisplayableHack(charset); } else { charset->good_family = new QString; for (const char* fm = lst.first(); fm; fm = lst.next()) { *(charset->good_family) += fm; *(charset->good_family) += "/"; } return true; } } #undef kchdebug void KCharsetsData::convert(unsigned code,KCharsetConversionResult &convResult){ unsigned chr; kchdebug("KCD:convert(code) %4X -> ",code); chr=0; kchdebug("Clearing result (was: %s)...\n",(const char *)convResult.cText); convResult.cText=""; kchdebug("Clearing charset...\n"); convResult.cCharset=charsetEntry("us-ascii"); if (code>127 && code != 0xA0){ // 0xa0 is present in ever iso8859 charset kchdebug("Hi code, dictonary needed, getting...\n"); const QIntDict *dict=getDisplayableDict(); kchdebug("Dictonary: %p\n",dict); KDispCharEntry *ptr=(*dict)[code]; kchdebug("Entry: %p\n",ptr); if (ptr){ chr=ptr->code; kchdebug("Setting charset to %s...\n",ptr->charset->name); convResult.cCharset=ptr->charset; kchdebug("Setting text to code %2X...\n",chr); convResult.cText+=(unsigned char)chr; } } else{ kchdebug("Setting text to code %2X...\n",code); convResult.cText+=(unsigned char)code; } kchdebug("%s\n",(const char *)convResult); } unsigned KCharsetsData::decodeAmp(const char *seq,int &len){ unsigned int i; kchdebug("Sequence: '%0.20s'\n",seq); if (*seq=='&') { seq++; len=1; } else len=0; if (*seq=='#'){ char *endptr; unsigned num; if (*(seq+1) == 'x') num =strtoul(seq+2,&endptr,16); else num =strtoul(seq+1,&endptr,10); kchdebug("Number: '%u'\n",num); if (*endptr==';') len+=endptr-seq+1; else len+=endptr-seq; return num; } else for(i=0;i * KCharsetsData::getDisplayableDict(){ if (displayableCharsDict) return displayableCharsDict; kchdebug("Generating dictonary for displayable characters\n"); displayableCharsDict=new QIntDict; displayableCharsDict->setAutoDelete(TRUE); for(int i=0;charsets[i].name!=0;i++) if (charsets[i].toUnicode && isDisplayable(charsets+i)){ kchdebug("Adding characters form %s\n",charsets[i].name); for(int j=0;j<256;j++){ unsigned unicode=charsets[i].toUnicode[j]; if ( !(*displayableCharsDict)[unicode] ){ KDispCharEntry *e=new KDispCharEntry; e->charset=charsets+i; e->code=j; displayableCharsDict->insert(unicode,e); } } } #ifdef KCH_DEBUG displayableCharsDict->statistics(); #endif return displayableCharsDict; } QString KCharsetsData::fromX(QString name){ if ( strncmp(name,"iso",3)==0 ){ name="iso-"+name.mid(3,100); return name; } if ( strncmp(name,"koi8", 4) == 0 ) return name; KEntryIterator *it=config->entryIterator("XNames"); if ( it ) { while( it->current() ){ const char * key = it->currentKey(); if (it->current()->aValue==name ){ delete it; return key; } ++(*it); } } delete it; return ""; } QString KCharsetsData::toX(QString name){ if ( strncmp(name,"iso-",4)==0 ){ name="iso"+name.mid(4,100); return name; } if ( strncmp(name,"koi8", 4) == 0 ) return name; config->setGroup("XNames"); return config->readEntry(name,""); } diff --git a/kdecore/libintl.cpp b/kdecore/libintl.cpp index aef15b72..5499921e 100644 --- a/kdecore/libintl.cpp +++ b/kdecore/libintl.cpp @@ -1,1813 +1,1813 @@ /* libintl.cpp -- gettext related functions from glibc-2.0.5 Copyright (C) 1995 Software Foundation, Inc. This file is part of the KDE libraries, but it's derived work out of glibc. The master sources can be found in bindtextdom.c dcgettext.c dgettext.c explodename.c finddomain.c gettext.c gettext.h gettextP.h hash-string.h l10nflist.c libintl.h loadinfo.h loadmsgcat.c localealias.c textdomain.c which are part of glibc. The license is the same as in GLIBC, which is the GNU Library General Public License. See COPYING.LIB for more details. */ /* gettext.c -- implementation of gettext(3) function Copyright (C) 1995 Software Foundation, Inc. This file is part of the GNU C Library. Its master source is NOT part of the C library, however. The master source lives in /gd/gnu/lib. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef HAVE_CONFIG_H # include #endif /* Stephan Kulow: some defaults */ #ifndef GNULOCALEDIR #define GNULOCALEDIR "/usr/share/locale/" #endif #ifndef LOCALE_ALIAS_PATH #define LOCALE_ALIAS_PATH "/usr/share/local" #endif #include #include #define __USE_GNU #if defined(HAVE_ARGZ_H) #ifdef __BEGIN_DECLS #undef __BEGIN_DECLS #undef __END_DECLS #define __BEGIN_DECLS #define __END_DECLS #define _ARGZ_H_WORKAROUND_ extern "C" { #endif # include #ifdef _ARGZ_H_WORKAROUND_ } #undef __BEGIN_DECLS #undef __END_DECLS #define __BEGIN_DECLS extern "C" { #define __END_DECLS } #endif // _ARGZ_H_WORKAROUND_ #endif // HAVE_ARGZ_H #ifdef HAVE_MALLOC_H # include #endif #if defined HAVE_STRING_H # include #else # include #endif #include #include #include #if defined __GNUC__ && !defined alloca # define alloca __builtin_alloca # define HAVE_ALLOCA 1 #else # if defined HAVE_ALLOCA_H # include # else # ifdef _AIX #pragma alloca # else # ifndef alloca char *alloca (); # endif # endif # endif #endif #include #ifndef errno extern int errno; #endif #ifndef __set_errno # define __set_errno(val) errno = (val) #endif #if !HAVE_STRCHR # ifndef strchr # define strchr index # endif #endif #if defined HAVE_UNISTD_H # include #endif #include #ifdef NULL #undef NULL #endif #define NULL 0 #if HAVE_LIMITS_H # include #endif #if (defined HAVE_MMAP && defined HAVE_MUNMAP) # include #endif /* The following contortions are an attempt to use the C preprocessor to determine an unsigned integral type that is 32 bits wide. An alternative approach is to use autoconf's AC_CHECK_SIZEOF macro, but doing that would require that the configure script compile and *run* the resulting executable. Locally running cross-compiled executables is usually not possible. */ #if __STDC__ # define UINT_MAX_32_BITS 4294967295U #else # define UINT_MAX_32_BITS 0xFFFFFFFF #endif /* If UINT_MAX isn't defined, assume it's a 32-bit type. This should be valid for all systems GNU cares about because that doesn't include 16-bit systems, and only modern systems (that certainly have ) have 64+-bit integral types. */ #ifndef UINT_MAX # define UINT_MAX UINT_MAX_32_BITS #endif #if UINT_MAX == UINT_MAX_32_BITS typedef unsigned nls_uint32; #else # if USHRT_MAX == UINT_MAX_32_BITS typedef unsigned short nls_uint32; # else # if ULONG_MAX == UINT_MAX_32_BITS typedef unsigned long nls_uint32; # else /* The following line is intended to throw an error. Using #error is not portable enough. */ "Cannot determine unsigned 32-bit data type." # endif # endif #endif #ifndef W # define W(flag, data) ((flag) ? SWAP (data) : (data)) #endif struct loaded_domain { const char *data; int must_swap; nls_uint32 nstrings; struct string_desc *orig_tab; struct string_desc *trans_tab; nls_uint32 hash_size; nls_uint32 *hash_tab; }; struct binding { struct binding *next; char *domainname; char *dirname; }; /* Encoding of locale name parts. */ #define CEN_REVISION 1 #define CEN_SPONSOR 2 #define CEN_SPECIAL 4 #define XPG_NORM_CODESET 8 #define XPG_CODESET 16 #define TERRITORY 32 #define CEN_AUDIENCE 64 #define XPG_MODIFIER 128 #define CEN_SPECIFIC (CEN_REVISION|CEN_SPONSOR|CEN_SPECIAL|CEN_AUDIENCE) #define XPG_SPECIFIC (XPG_CODESET|XPG_NORM_CODESET|XPG_MODIFIER) struct loaded_l10nfile { const char *filename; int decided; const void *data; struct loaded_l10nfile *next; struct loaded_l10nfile *successor[1]; }; extern const char *_nl_normalize_codeset(const char *codeset, size_t name_len); extern struct loaded_l10nfile * _nl_make_l10nflist (struct loaded_l10nfile **l10nfile_list, const char *dirlist, size_t dirlist_len, int mask, const char *language, const char *territory, const char *codeset, const char *normalized_codeset, const char *modifier, const char *special, const char *sponsor, const char *revision, const char *filename, int do_allocate); extern const char *_nl_expand_alias(const char *name); extern int _nl_explode_name (char *name, const char **language, const char **modifier, const char **territory, const char **codeset, const char **normalized_codeset, const char **special, const char **sponsor, const char **revision); struct loaded_l10nfile *_nl_find_domain (const char *__dirname, char *__locale, const char *__domainname); void k_nl_load_domain(struct loaded_l10nfile *__domain); static inline nls_uint32 SWAP (nls_uint32 i) { return (i << 24) | ((i & 0xff00) << 8) | ((i >> 8) & 0xff00) | (i >> 24); } /* @@ end of prolog @@ */ /* The magic number of the GNU message catalog format. */ #define _MAGIC 0x950412de #define _MAGIC_SWAPPED 0xde120495 /* Revision number of the currently used .mo (binary) file format. */ #define MO_REVISION_NUMBER 0 /* Defines the so called `hashpjw' function by P.J. Weinberger [see Aho/Sethi/Ullman, COMPILERS: Principles, Techniques and Tools, 1986, 1987 Bell Telephone Laboratories, Inc.] */ static inline unsigned long hash_string (const char *__str_param); /* @@ end of prolog @@ */ /* Contains the default location of the message catalogs. */ extern const char k_nl_default_dirname[]; /* List with bindings of specific domains. */ extern struct binding *_nl_domain_bindings; /* Header for binary .mo file format. */ struct mo_file_header { /* The magic number. */ nls_uint32 magic; /* The revision number of the file format. */ nls_uint32 revision; /* The number of strings pairs. */ nls_uint32 nstrings; /* Offset of table with start offsets of original strings. */ nls_uint32 orig_tab_offset; /* Offset of table with start offsets of translation strings. */ nls_uint32 trans_tab_offset; /* Size of hashing table. */ nls_uint32 hash_tab_size; /* Offset of first hashing entry. */ nls_uint32 hash_tab_offset; }; struct string_desc { /* Length of addressed string. */ nls_uint32 length; /* Offset of string in file. */ nls_uint32 offset; }; /* Specify that the DOMAINNAME message catalog will be found in DIRNAME rather than in the system locale data base. */ char * k_bindtextdomain(const char *domainname, const char *dirname) { struct binding *binding; /* Some sanity checks. */ if (domainname == NULL || domainname[0] == '\0') return NULL; for (binding = _nl_domain_bindings; binding != NULL; binding = binding->next) { int compare = strcmp (domainname, binding->domainname); if (compare == 0) /* We found it! */ break; if (compare < 0) { /* It is not in the list. */ binding = NULL; break; } } if (dirname == NULL) /* The current binding has be to returned. */ return binding == NULL ? (char *) k_nl_default_dirname : binding->dirname; if (binding != NULL) { /* The domain is already bound. Replace the old binding. */ char *new_dirname; if (strcmp (dirname, k_nl_default_dirname) == 0) new_dirname = (char *) k_nl_default_dirname; else { size_t len = strlen (dirname) + 1; new_dirname = (char *) malloc (len); if (new_dirname == NULL) return NULL; memcpy (new_dirname, dirname, len); } if (strcmp (binding->dirname, k_nl_default_dirname) != 0) free (binding->dirname); binding->dirname = new_dirname; } else { /* We have to create a new binding. */ size_t len; struct binding *new_binding = (struct binding *) malloc (sizeof (*new_binding)); if (new_binding == NULL) return NULL; len = strlen (domainname) + 1; new_binding->domainname = (char *) malloc (len); if (new_binding->domainname == NULL) return NULL; memcpy (new_binding->domainname, domainname, len); if (strcmp (dirname, k_nl_default_dirname) == 0) new_binding->dirname = (char *) k_nl_default_dirname; else { len = strlen (dirname) + 1; new_binding->dirname = (char *) malloc (len); if (new_binding->dirname == NULL) return NULL; memcpy (new_binding->dirname, dirname, len); } /* Now enqueue it. */ if (_nl_domain_bindings == NULL || strcmp (domainname, _nl_domain_bindings->domainname) < 0) { new_binding->next = _nl_domain_bindings; _nl_domain_bindings = new_binding; } else { binding = _nl_domain_bindings; while (binding->next != NULL && strcmp (domainname, binding->next->domainname) > 0) binding = binding->next; new_binding->next = binding->next; binding->next = new_binding; } binding = new_binding; } return binding->dirname; } # if !defined HAVE_GETCWD char *getwd (); # define getcwd(buf, max) getwd (buf) # else char *getcwd (); # endif # ifndef HAVE_STPCPY static char *stpcpy (char *dest, const char *src); # endif /* Amount to increase buffer size by in each try. */ #define PATH_INCR 32 /* The following is from pathmax.h. */ /* Non-POSIX BSD systems might have gcc's limits.h, which doesn't define PATH_MAX but might cause redefinition warnings when sys/param.h is later included (as on MORE/BSD 4.3). */ #ifndef _POSIX_PATH_MAX # define _POSIX_PATH_MAX 255 #endif #if !defined(PATH_MAX) && defined(_PC_PATH_MAX) # define PATH_MAX (pathconf ("/", _PC_PATH_MAX) < 1 ? 1024 : pathconf ("/", _PC_PATH_MAX)) #endif /* Don't include sys/param.h if it already has been. */ #if defined(HAVE_SYS_PARAM_H) && !defined(PATH_MAX) && !defined(MAXPATHLEN) # include #endif #if !defined(PATH_MAX) && defined(MAXPATHLEN) # define PATH_MAX MAXPATHLEN #endif #ifndef PATH_MAX # define PATH_MAX _POSIX_PATH_MAX #endif /* XPG3 defines the result of `setlocale (category, NULL)' as: ``Directs `setlocale()' to query `category' and return the current setting of `local'.'' However it does not specify the exact format. And even worse: POSIX defines this not at all. So we can use this feature only on selected system (e.g. those using GNU C Library). */ /* Name of the default domain used for gettext(3) prior any call to textdomain(3). The default value for this is "messages". */ const char _nl_default_default_domain[] = "messages"; /* Value used as the default domain for gettext(3). */ const char *k_nl_current_default_domain = _nl_default_default_domain; /* Contains the default location of the message catalogs. */ const char k_nl_default_dirname[] = GNULOCALEDIR; /* List with bindings of specific domains created by bindtextdomain() calls. */ struct binding *_nl_domain_bindings; /* Prototypes for local functions. */ static char *find_msg (struct loaded_l10nfile *domain_file, const char *msgid); /* For those loosing systems which don't have `alloca' we have to add some additional code emulating it. */ #ifdef HAVE_ALLOCA /* Nothing has to be done. */ # define ADD_BLOCK(list, address) /* nothing */ # define FREE_BLOCKS(list) /* nothing */ #else struct block_list { void *address; struct block_list *next; }; # define ADD_BLOCK(list, addr) \ do { \ struct block_list *newp = (struct block_list *) malloc (sizeof (*newp)); \ /* If we cannot get a free block we cannot add the new element to \ the list. */ \ if (newp != NULL) { \ newp->address = (addr); \ newp->next = (list); \ (list) = newp; \ } \ } while (0) # define FREE_BLOCKS(list) \ do { \ while (list != NULL) { \ struct block_list *old = list; \ list = list->next; \ free (old); \ } \ } while (0) # undef alloca # define alloca(size) (malloc (size)) #endif /* have alloca */ /* Look up MSGID in the DOMAINNAME message catalog for the current CATEGORY locale. */ char * k_dcgettext (const char *domainname, const char *msgid, const char *categoryvalue) { #ifndef HAVE_ALLOCA struct block_list *block_list = NULL; #endif struct loaded_l10nfile *domain; struct binding *binding; char *dirname, *xdomainname; const char *categoryname; char *single_locale; char *retval; int saved_errno = errno; const char *_domainname = (domainname == 0L) ? k_nl_current_default_domain : domainname; /* If no real MSGID is given return NULL. */ if (msgid == NULL) return NULL; /* First find matching binding. */ for (binding = _nl_domain_bindings; binding != NULL; binding = binding->next) { int compare = strcmp (_domainname, binding->domainname); if (compare == 0) /* We found it! */ break; if (compare < 0) { /* It is not in the list. */ binding = NULL; break; } } if (binding == NULL) dirname = (char *) k_nl_default_dirname; else if (binding->dirname[0] == '/') dirname = binding->dirname; else { /* We have a relative path. Make it absolute now. */ size_t dirname_len = strlen (binding->dirname) + 1; size_t path_max; char *ret; path_max = (unsigned) PATH_MAX; path_max += 2; /* The getcwd docs say to do this. */ dirname = (char *) alloca (path_max + dirname_len); ADD_BLOCK (block_list, dirname); __set_errno (0); while ((ret = getcwd (dirname, path_max)) == NULL && errno == ERANGE) { path_max += PATH_INCR; dirname = (char *) alloca (path_max + dirname_len); ADD_BLOCK (block_list, dirname); __set_errno (0); } if (ret == NULL) { /* We cannot get the current working directory. Don't signal an error but simply return the default string. */ FREE_BLOCKS (block_list); __set_errno (saved_errno); return (char *) msgid; } /* We don't want libintl.a to depend on any other library. So we avoid the non-standard function stpcpy. In GNU C Library this function is available, though. Also allow the symbol HAVE_STPCPY to be defined. */ stpcpy (stpcpy (strchr (dirname, '\0'), "/"), binding->dirname); } /* Now determine the symbolic name of CATEGORY and its value. */ /* Stephan: we don't use this categoryname = category_to_name (category); categoryvalue = guess_category_value (category, categoryname); */ categoryname = "LC_MESSAGES"; xdomainname = (char *) alloca (strlen (categoryname) + strlen (_domainname) + 5); ADD_BLOCK (block_list, xdomainname); /* We don't want libintl.a to depend on any other library. So we avoid the non-standard function stpcpy. In GNU C Library this function is available, though. Also allow the symbol HAVE_STPCPY to be defined. */ stpcpy (stpcpy (stpcpy (stpcpy (xdomainname, categoryname), "/"), _domainname), ".mo"); /* Creating working area. */ - single_locale = (char *) alloca (strlen (categoryvalue) + 1); + single_locale = (char *) alloca (strlen (categoryvalue) + 2); ADD_BLOCK (block_list, single_locale); /* Search for the given string. This is a loop because we perhaps got an ordered list of languages to consider for th translation. */ while (1) { /* Make CATEGORYVALUE point to the next element of the list. */ while (categoryvalue[0] != '\0' && categoryvalue[0] == ':') ++categoryvalue; if (categoryvalue[0] == '\0') { /* The whole contents of CATEGORYVALUE has been searched but no valid entry has been found. We solve this situation by implicitly appending a "C" entry, i.e. no translation will take place. */ single_locale[0] = 'C'; single_locale[1] = '\0'; } else { char *cp = single_locale; while (categoryvalue[0] != '\0' && categoryvalue[0] != ':') *cp++ = *categoryvalue++; *cp = '\0'; } /* If the current locale value is C (or POSIX) we don't load a domain. Return the MSGID. */ if (strcmp (single_locale, "C") == 0 || strcmp (single_locale, "POSIX") == 0) { FREE_BLOCKS (block_list); __set_errno (saved_errno); return (char *) msgid; } /* Find structure describing the message catalog matching the DOMAINNAME and CATEGORY. */ domain = _nl_find_domain (dirname, single_locale, xdomainname); if (domain != NULL) { retval = find_msg (domain, msgid); if (retval == NULL) { int cnt; for (cnt = 0; domain->successor[cnt] != NULL; ++cnt) { retval = find_msg (domain->successor[cnt], msgid); if (retval != NULL) break; } } if (retval != NULL) { FREE_BLOCKS (block_list); __set_errno (saved_errno); return retval; } } } /* NOTREACHED */ } static char * find_msg (struct loaded_l10nfile *domain_file, const char *msgid) { size_t top, act, bottom; struct loaded_domain *domain; if (domain_file->decided == 0) k_nl_load_domain (domain_file); if (domain_file->data == NULL) return NULL; domain = (struct loaded_domain *) domain_file->data; /* Locate the MSGID and its translation. */ if (domain->hash_size > 2 && domain->hash_tab != NULL) { /* Use the hashing table. */ nls_uint32 len = strlen (msgid); nls_uint32 hash_val = hash_string (msgid); nls_uint32 idx = hash_val % domain->hash_size; nls_uint32 incr = 1 + (hash_val % (domain->hash_size - 2)); nls_uint32 nstr = W (domain->must_swap, domain->hash_tab[idx]); if (nstr == 0) /* Hash table entry is empty. */ return NULL; if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len && strcmp (msgid, domain->data + W (domain->must_swap, domain->orig_tab[nstr - 1].offset)) == 0) return (char *) domain->data + W (domain->must_swap, domain->trans_tab[nstr - 1].offset); while (1) { if (idx >= domain->hash_size - incr) idx -= domain->hash_size - incr; else idx += incr; nstr = W (domain->must_swap, domain->hash_tab[idx]); if (nstr == 0) /* Hash table entry is empty. */ return NULL; if (W (domain->must_swap, domain->orig_tab[nstr - 1].length) == len && strcmp (msgid, domain->data + W (domain->must_swap, domain->orig_tab[nstr - 1].offset)) == 0) return (char *) domain->data + W (domain->must_swap, domain->trans_tab[nstr - 1].offset); } /* NOTREACHED */ } /* Now we try the default method: binary search in the sorted array of messages. */ bottom = 0; top = domain->nstrings; while (bottom < top) { int cmp_val; act = (bottom + top) / 2; cmp_val = strcmp (msgid, domain->data + W (domain->must_swap, domain->orig_tab[act].offset)); if (cmp_val < 0) top = act; else if (cmp_val > 0) bottom = act + 1; else break; } /* If an translation is found return this. */ return bottom >= top ? NULL : (char *) domain->data + W (domain->must_swap, domain->trans_tab[act].offset); } /* @@ begin of epilog @@ */ /* We don't want libintl.a to depend on any other library. So we avoid the non-standard function stpcpy. In GNU C Library this function is available, though. Also allow the symbol HAVE_STPCPY to be defined. */ #if !HAVE_STPCPY static char * stpcpy (char *dest, const char *src) { while ((*dest++ = *src++) != '\0') /* Do nothing. */ ; return dest - 1; } #endif int _nl_explode_name (char * name, const char **language, const char **modifier, const char **territory, const char **codeset, const char **normalized_codeset, const char **special, const char **sponsor, const char **revision) { enum { undecided, xpg, cen } syntax; char *cp; int mask; *modifier = NULL; *territory = NULL; *codeset = NULL; *normalized_codeset = NULL; *special = NULL; *sponsor = NULL; *revision = NULL; /* Now we determine the single parts of the locale name. First look for the language. Termination symbols are `_' and `@' if we use XPG4 style, and `_', `+', and `,' if we use CEN syntax. */ mask = 0; syntax = undecided; *language = cp = name; while (cp[0] != '\0' && cp[0] != '_' && cp[0] != '@' && cp[0] != '+' && cp[0] != ',') ++cp; if (*language == cp) /* This does not make sense: language has to be specified. Use this entry as it is without exploding. Perhaps it is an alias. */ cp = strchr (*language, '\0'); else if (cp[0] == '_') { /* Next is the territory. */ cp[0] = '\0'; *territory = ++cp; while (cp[0] != '\0' && cp[0] != '.' && cp[0] != '@' && cp[0] != '+' && cp[0] != ',' && cp[0] != '_') ++cp; mask |= TERRITORY; if (cp[0] == '.') { /* Next is the codeset. */ syntax = xpg; cp[0] = '\0'; *codeset = ++cp; while (cp[0] != '\0' && cp[0] != '@') ++cp; mask |= XPG_CODESET; if (*codeset != cp && (*codeset)[0] != '\0') { *normalized_codeset = _nl_normalize_codeset (*codeset, cp - *codeset); if (strcmp (*codeset, *normalized_codeset) == 0) free ((char *) *normalized_codeset); else mask |= XPG_NORM_CODESET; } } } if (cp[0] == '@' || (syntax != xpg && cp[0] == '+')) { /* Next is the modifier. */ syntax = cp[0] == '@' ? xpg : cen; cp[0] = '\0'; *modifier = ++cp; while (syntax == cen && cp[0] != '\0' && cp[0] != '+' && cp[0] != ',' && cp[0] != '_') ++cp; mask |= XPG_MODIFIER | CEN_AUDIENCE; } if (syntax != xpg && (cp[0] == '+' || cp[0] == ',' || cp[0] == '_')) { syntax = cen; if (cp[0] == '+') { /* Next is special application (CEN syntax). */ cp[0] = '\0'; *special = ++cp; while (cp[0] != '\0' && cp[0] != ',' && cp[0] != '_') ++cp; mask |= CEN_SPECIAL; } if (cp[0] == ',') { /* Next is sponsor (CEN syntax). */ cp[0] = '\0'; *sponsor = ++cp; while (cp[0] != '\0' && cp[0] != '_') ++cp; mask |= CEN_SPONSOR; } if (cp[0] == '_') { /* Next is revision (CEN syntax). */ cp[0] = '\0'; *revision = ++cp; mask |= CEN_REVISION; } } /* For CEN syntax values it might be important to have the separator character in the file name, not for XPG syntax. */ if (syntax == xpg) { if (*territory != NULL && (*territory)[0] == '\0') mask &= ~TERRITORY; if (*codeset != NULL && (*codeset)[0] == '\0') mask &= ~XPG_CODESET; if (*modifier != NULL && (*modifier)[0] == '\0') mask &= ~XPG_MODIFIER; } return mask; } static struct loaded_l10nfile *_nl_loaded_domains; /* Return a data structure describing the message catalog described by the DOMAINNAME and CATEGORY parameters with respect to the currently established bindings. */ struct loaded_l10nfile * _nl_find_domain (const char *dirname, char *locale, const char *domainname) { struct loaded_l10nfile *retval; const char *language; const char *modifier; const char *territory; const char *codeset; const char *normalized_codeset; const char *special; const char *sponsor; const char *revision; const char *alias_value; int mask; /* LOCALE can consist of up to four recognized parts for the XPG syntax: language[_territory[.codeset]][@modifier] and six parts for the CEN syntax: language[_territory][+audience][+special][,[sponsor][_revision]] Beside the first all of them are allowed to be missing. If the full specified locale is not found, the less specific one are looked for. The various part will be stripped of according to the following order: (1) revision (2) sponsor (3) special (4) codeset (5) normalized codeset (6) territory (7) audience/modifier */ /* If we have already tested for this locale entry there has to be one data set in the list of loaded domains. */ retval = _nl_make_l10nflist (&_nl_loaded_domains, dirname, strlen (dirname) + 1, 0, locale, NULL, NULL, NULL, NULL, NULL, NULL, NULL, domainname, 0); if (retval != NULL) { /* We know something about this locale. */ int cnt; if (retval->decided == 0) k_nl_load_domain (retval); if (retval->data != NULL) return retval; for (cnt = 0; retval->successor[cnt] != NULL; ++cnt) { if (retval->successor[cnt]->decided == 0) k_nl_load_domain (retval->successor[cnt]); if (retval->successor[cnt]->data != NULL) break; } return cnt >= 0 ? retval : NULL; /* NOTREACHED */ } /* See whether the locale value is an alias. If yes its value *overwrites* the alias name. No test for the original value is done. */ alias_value = _nl_expand_alias (locale); if (alias_value != NULL) { size_t len = strlen (alias_value) + 1; locale = (char *) malloc (len); if (locale == NULL) return NULL; memcpy (locale, alias_value, len); } /* Now we determine the single parts of the locale name. First look for the language. Termination symbols are `_' and `@' if we use XPG4 style, and `_', `+', and `,' if we use CEN syntax. */ mask = _nl_explode_name (locale, &language, &modifier, &territory, &codeset, &normalized_codeset, &special, &sponsor, &revision); /* Create all possible locale entries which might be interested in generalization. */ retval = _nl_make_l10nflist (&_nl_loaded_domains, dirname, strlen (dirname) + 1, mask, language, territory, codeset, normalized_codeset, modifier, special, sponsor, revision, domainname, 1); if (retval == NULL) /* This means we are out of core. */ return NULL; if (retval->decided == 0) k_nl_load_domain (retval); if (retval->data == NULL) { int cnt; for (cnt = 0; retval->successor[cnt] != NULL; ++cnt) { if (retval->successor[cnt]->decided == 0) k_nl_load_domain (retval->successor[cnt]); if (retval->successor[cnt]->data != NULL) break; } } /* The room for an alias was dynamically allocated. Free it now. */ if (alias_value != NULL) free (locale); return retval; } /* We assume to have `unsigned long int' value with at least 32 bits. */ #define HASHWORDBITS 32 static inline unsigned long hash_string (const char *str_param) { unsigned long int hval, g; const char *str = str_param; /* Compute the hash value for the given string. */ hval = 0; while (*str != '\0') { hval <<= 4; hval += (unsigned long) *str++; g = hval & ((unsigned long) 0xf << (HASHWORDBITS - 4)); if (g != 0) { hval ^= g >> (HASHWORDBITS - 8); hval ^= g; } } return hval; } /* Define function which are usually not available. */ #if !defined HAVE___ARGZ_COUNT /* Returns the number of strings in ARGZ. */ static size_t argz_count__ (const char *argz, size_t len) { size_t count = 0; while (len > 0) { size_t part_len = strlen (argz); argz += part_len + 1; len -= part_len + 1; count++; } return count; } # undef __argz_count # define __argz_count(argz, len) argz_count__ (argz, len) #endif /* !HAVE___ARGZ_COUNT */ #if !defined HAVE___ARGZ_STRINGIFY /* Make '\0' separated arg vector ARGZ printable by converting all the '\0's except the last into the character SEP. */ static void argz_stringify__ (char *argz, size_t len, int sep); static void argz_stringify__ (char *argz,size_t len, int sep) { while (len > 0) { size_t part_len = strlen (argz); argz += part_len; len -= part_len + 1; if (len > 0) *argz++ = sep; } } # undef __argz_stringify # define __argz_stringify(argz, len, sep) argz_stringify__ (argz, len, sep) #endif /* !HAVE___ARGZ_STRINGIFY */ #if !defined HAVE___ARGZ_NEXT static char * argz_next__ ( char *argz,size_t argz_len, const char *entry) { if (entry) { if (entry < argz + argz_len) entry = strchr (entry, '\0') + 1; return entry >= argz + argz_len ? NULL : (char *) entry; } else if (argz_len > 0) return argz; else return 0; } # undef __argz_next # define __argz_next(argz, len, entry) argz_next__ (argz, len, entry) #endif /* !HAVE___ARGZ_NEXT */ static inline int pop (int x) { /* We assume that no more than 16 bits are used. */ x = ((x & ~0x5555) >> 1) + (x & 0x5555); x = ((x & ~0x3333) >> 2) + (x & 0x3333); x = ((x >> 4) + x) & 0x0f0f; x = ((x >> 8) + x) & 0xff; return x; } struct loaded_l10nfile * _nl_make_l10nflist (struct loaded_l10nfile **l10nfile_list, const char *dirlist, size_t dirlist_len, int mask, const char *language, const char *territory, const char *codeset, const char *normalized_codeset, const char *modifier, const char *special, const char *sponsor, const char *revision, const char *filename, int do_allocate) { char *abs_filename; struct loaded_l10nfile *last = NULL; struct loaded_l10nfile *retval; char *cp; size_t entries; int cnt; /* Allocate room for the full file name. */ abs_filename = (char *) malloc (dirlist_len + strlen (language) + ((mask & TERRITORY) != 0 ? strlen (territory) + 1 : 0) + ((mask & XPG_CODESET) != 0 ? strlen (codeset) + 1 : 0) + ((mask & XPG_NORM_CODESET) != 0 ? strlen (normalized_codeset) + 1 : 0) + (((mask & XPG_MODIFIER) != 0 || (mask & CEN_AUDIENCE) != 0) ? strlen (modifier) + 1 : 0) + ((mask & CEN_SPECIAL) != 0 ? strlen (special) + 1 : 0) + (((mask & CEN_SPONSOR) != 0 || (mask & CEN_REVISION) != 0) ? (1 + ((mask & CEN_SPONSOR) != 0 ? strlen (sponsor) + 1 : 0) + ((mask & CEN_REVISION) != 0 ? strlen (revision) + 1 : 0)) : 0) + 1 + strlen (filename) + 1); if (abs_filename == NULL) return NULL; retval = NULL; last = NULL; /* Construct file name. */ memcpy (abs_filename, dirlist, dirlist_len); __argz_stringify (abs_filename, dirlist_len, ':'); cp = abs_filename + (dirlist_len - 1); *cp++ = '/'; cp = (char*)stpcpy (cp, language); if ((mask & TERRITORY) != 0) { *cp++ = '_'; cp = stpcpy (cp, territory); } if ((mask & XPG_CODESET) != 0) { *cp++ = '.'; cp = stpcpy (cp, codeset); } if ((mask & XPG_NORM_CODESET) != 0) { *cp++ = '.'; cp = stpcpy (cp, normalized_codeset); } if ((mask & (XPG_MODIFIER | CEN_AUDIENCE)) != 0) { /* This component can be part of both syntaces but has different leading characters. For CEN we use `+', else `@'. */ *cp++ = (mask & CEN_AUDIENCE) != 0 ? '+' : '@'; cp = stpcpy (cp, modifier); } if ((mask & CEN_SPECIAL) != 0) { *cp++ = '+'; cp = stpcpy (cp, special); } if ((mask & (CEN_SPONSOR | CEN_REVISION)) != 0) { *cp++ = ','; if ((mask & CEN_SPONSOR) != 0) cp = stpcpy (cp, sponsor); if ((mask & CEN_REVISION) != 0) { *cp++ = '_'; cp = stpcpy (cp, revision); } } *cp++ = '/'; stpcpy (cp, filename); /* Look in list of already loaded domains whether it is already available. */ last = NULL; for (retval = *l10nfile_list; retval != NULL; retval = retval->next) if (retval->filename != NULL) { int compare = strcmp (retval->filename, abs_filename); if (compare == 0) /* We found it! */ break; if (compare < 0) { /* It's not in the list. */ retval = NULL; break; } last = retval; } if (retval != NULL || do_allocate == 0) { free (abs_filename); return retval; } retval = (struct loaded_l10nfile *) malloc (sizeof (*retval) + (__argz_count (dirlist, dirlist_len) * (1 << pop (mask)) * sizeof (struct loaded_l10nfile *))); if (retval == NULL) return NULL; retval->filename = abs_filename; retval->decided = (__argz_count (dirlist, dirlist_len) != 1 || ((mask & XPG_CODESET) != 0 && (mask & XPG_NORM_CODESET) != 0)); retval->data = NULL; if (last == NULL) { retval->next = *l10nfile_list; *l10nfile_list = retval; } else { retval->next = last->next; last->next = retval; } entries = 0; /* If the DIRLIST is a real list the RETVAL entry corresponds not to a real file. So we have to use the DIRLIST separation mechanism of the inner loop. */ cnt = __argz_count (dirlist, dirlist_len) == 1 ? mask - 1 : mask; for (; cnt >= 0; --cnt) if ((cnt & ~mask) == 0 && ((cnt & CEN_SPECIFIC) == 0 || (cnt & XPG_SPECIFIC) == 0) && ((cnt & XPG_CODESET) == 0 || (cnt & XPG_NORM_CODESET) == 0)) { /* Iterate over all elements of the DIRLIST. */ char *dir = NULL; while ((dir = __argz_next ((char *) dirlist, dirlist_len, dir)) != NULL) retval->successor[entries++] = _nl_make_l10nflist (l10nfile_list, dir, strlen (dir) + 1, cnt, language, territory, codeset, normalized_codeset, modifier, special, sponsor, revision, filename, 1); } retval->successor[entries] = NULL; return retval; } /* Normalize codeset name. There is no standard for the codeset names. Normalization allows the user to use any of the common names. */ const char * _nl_normalize_codeset (const char *codeset, size_t name_len) { int len = 0; int only_digit = 1; char *retval; char *wp; size_t cnt; for (cnt = 0; cnt < name_len; ++cnt) if (isalnum (codeset[cnt])) { ++len; if (isalpha (codeset[cnt])) only_digit = 0; } retval = (char *) malloc ((only_digit ? 3 : 0) + len + 1); if (retval != NULL) { if (only_digit) wp = stpcpy (retval, "iso"); else wp = retval; for (cnt = 0; cnt < name_len; ++cnt) if (isalpha (codeset[cnt])) *wp++ = tolower (codeset[cnt]); else if (isdigit (codeset[cnt])) *wp++ = codeset[cnt]; *wp = '\0'; } return (const char *) retval; } /* We need a sign, whether a new catalog was loaded, which can be associated with all translations. This is important if the translations are cached by one of GCC's features. */ int k_nl_msg_cat_cntr = 0; /* Load the message catalogs specified by FILENAME. If it is no valid message catalog do nothing. */ void k_nl_load_domain (struct loaded_l10nfile *domain_file) { int fd; struct stat st; struct mo_file_header *data = (struct mo_file_header *) -1; #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) int use_mmap = 0; #endif struct loaded_domain *domain; domain_file->decided = 1; domain_file->data = NULL; /* If the record does not represent a valid locale the FILENAME might be NULL. This can happen when according to the given specification the locale file name is different for XPG and CEN syntax. */ if (domain_file->filename == NULL) return; /* Try to open the addressed file. */ fd = open (domain_file->filename, O_RDONLY); if (fd == -1) return; /* We must know about the size of the file. */ if (fstat (fd, &st) != 0 && st.st_size < (off_t) sizeof (struct mo_file_header)) { /* Something went wrong. */ close (fd); return; } #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) /* Now we are ready to load the file. If mmap() is available we try this first. If not available or it failed we try to load it. */ data = (struct mo_file_header *) mmap (NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0); if (data != (struct mo_file_header *) -1) { /* mmap() call was successful. */ close (fd); use_mmap = 1; } #endif /* If the data is not yet available (i.e. mmap'ed) we try to load it manually. */ if (data == (struct mo_file_header *) -1) { off_t to_read; char *read_ptr; data = (struct mo_file_header *) malloc (st.st_size); if (data == NULL) return; to_read = st.st_size; read_ptr = (char *) data; do { long int nb = (long int) read (fd, read_ptr, to_read); if (nb == -1) { close (fd); return; } read_ptr += nb; to_read -= nb; } while (to_read > 0); close (fd); } /* Using the magic number we can test whether it really is a message catalog file. */ if (data->magic != _MAGIC && data->magic != _MAGIC_SWAPPED) { /* The magic number is wrong: not a message catalog file. */ #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) if (use_mmap) munmap ((caddr_t) data, st.st_size); else #endif free (data); return; } domain_file->data = (struct loaded_domain *) malloc (sizeof (struct loaded_domain)); if (domain_file->data == NULL) return; domain = (struct loaded_domain *) domain_file->data; domain->data = (char *) data; domain->must_swap = data->magic != _MAGIC; /* Fill in the information about the available tables. */ switch (W (domain->must_swap, data->revision)) { case 0: domain->nstrings = W (domain->must_swap, data->nstrings); domain->orig_tab = (struct string_desc *) ((char *) data + W (domain->must_swap, data->orig_tab_offset)); domain->trans_tab = (struct string_desc *) ((char *) data + W (domain->must_swap, data->trans_tab_offset)); domain->hash_size = W (domain->must_swap, data->hash_tab_size); domain->hash_tab = (nls_uint32 *) ((char *) data + W (domain->must_swap, data->hash_tab_offset)); break; default: /* This is an illegal revision. */ #if (defined HAVE_MMAP && defined HAVE_MUNMAP && !defined DISALLOW_MMAP) if (use_mmap) munmap ((caddr_t) data, st.st_size); else #endif free (data); free (domain); domain_file->data = NULL; return; } /* Show that one domain is changed. This might make some cached translations invalid. */ ++k_nl_msg_cat_cntr; } struct alias_map { const char *alias; const char *value; }; static struct alias_map *map; static size_t nmap = 0; static size_t maxmap = 0; /* Prototypes for local functions. */ static size_t read_alias_file (const char *fname, int fname_len); static void extend_alias_table (void); static int alias_compare (const struct alias_map *map1, const struct alias_map *map2); const char * _nl_expand_alias (const char *name) { static const char *locale_alias_path = LOCALE_ALIAS_PATH; struct alias_map *retval; size_t added; do { struct alias_map item; item.alias = name; if (nmap > 0) retval = (struct alias_map *) bsearch (&item, map, nmap, sizeof (struct alias_map), (int (*) (const void *, const void *)) alias_compare); else retval = NULL; /* We really found an alias. Return the value. */ if (retval != NULL) return retval->value; /* Perhaps we can find another alias file. */ added = 0; while (added == 0 && locale_alias_path[0] != '\0') { const char *start; while (locale_alias_path[0] == ':') ++locale_alias_path; start = locale_alias_path; while (locale_alias_path[0] != '\0' && locale_alias_path[0] != ':') ++locale_alias_path; if (start < locale_alias_path) added = read_alias_file (start, locale_alias_path - start); } } while (added != 0); return NULL; } static size_t read_alias_file (const char *fname, int fname_len) { #ifndef HAVE_ALLOCA struct block_list *block_list = NULL; #endif FILE *fp; char *full_fname; size_t added; static const char aliasfile[] = "/locale.alias"; full_fname = (char *) alloca (fname_len + sizeof aliasfile); ADD_BLOCK (block_list, full_fname); memcpy (full_fname, fname, fname_len); memcpy (&full_fname[fname_len], aliasfile, sizeof aliasfile); fp = fopen (full_fname, "r"); if (fp == NULL) { FREE_BLOCKS (block_list); return 0; } added = 0; while (!feof (fp)) { /* It is a reasonable approach to use a fix buffer here because a) we are only interested in the first two fields b) these fields must be usable as file names and so must not be that long */ char buf[BUFSIZ]; char *alias; char *value; char *cp; if (fgets (buf, BUFSIZ, fp) == NULL) /* EOF reached. */ break; cp = buf; /* Ignore leading white space. */ while (isspace (cp[0])) ++cp; /* A leading '#' signals a comment line. */ if (cp[0] != '\0' && cp[0] != '#') { alias = cp++; while (cp[0] != '\0' && !isspace (cp[0])) ++cp; /* Terminate alias name. */ if (cp[0] != '\0') *cp++ = '\0'; /* Now look for the beginning of the value. */ while (isspace (cp[0])) ++cp; if (cp[0] != '\0') { char *tp; size_t len; value = cp++; while (cp[0] != '\0' && !isspace (cp[0])) ++cp; /* Terminate value. */ if (cp[0] == '\n') { /* This has to be done to make the following test for the end of line possible. We are looking for the terminating '\n' which do not overwrite here. */ *cp++ = '\0'; *cp = '\n'; } else if (cp[0] != '\0') *cp++ = '\0'; if (nmap >= maxmap) extend_alias_table (); /* We cannot depend on strdup available in the libc. Sigh! */ len = strlen (alias) + 1; tp = (char *) malloc (len); if (tp == NULL) { FREE_BLOCKS (block_list); return added; } memcpy (tp, alias, len); map[nmap].alias = tp; len = strlen (value) + 1; tp = (char *) malloc (len); if (tp == NULL) { FREE_BLOCKS (block_list); return added; } memcpy (tp, value, len); map[nmap].value = tp; ++nmap; ++added; } } /* Possibly not the whole line fits into the buffer. Ignore the rest of the line. */ while (strchr (cp, '\n') == NULL) { cp = buf; if (fgets (buf, BUFSIZ, fp) == NULL) /* Make sure the inner loop will be left. The outer loop will exit at the `feof' test. */ *cp = '\n'; } } /* Should we test for ferror()? I think we have to silently ignore errors. --drepper */ fclose (fp); if (added > 0) qsort (map, nmap, sizeof (struct alias_map), (int (*) (const void *, const void *)) alias_compare); FREE_BLOCKS (block_list); return added; } static void extend_alias_table () { size_t new_size; struct alias_map *new_map; new_size = maxmap == 0 ? 100 : 2 * maxmap; new_map = (struct alias_map *) malloc (new_size * sizeof (struct alias_map)); if (new_map == NULL) /* Simply don't extend: we don't have any more core. */ return; memcpy (new_map, map, nmap * sizeof (struct alias_map)); if (maxmap != 0) free (map); map = new_map; maxmap = new_size; } static int alias_compare (const struct alias_map *map1, const struct alias_map *map2) { #if defined HAVE_STRCASECMP return strcasecmp (map1->alias, map2->alias); #else const unsigned char *p1 = (const unsigned char *) map1->alias; const unsigned char *p2 = (const unsigned char *) map2->alias; unsigned char c1, c2; if (p1 == p2) return 0; do { /* I know this seems to be odd but the tolower() function in some systems libc cannot handle nonalpha characters. */ c1 = isupper (*p1) ? tolower (*p1) : *p1; c2 = isupper (*p2) ? tolower (*p2) : *p2; if (c1 == '\0') break; ++p1; ++p2; } while (c1 == c2); return c1 - c2; #endif } /* Name of the default text domain. */ extern const char _nl_default_default_domain[]; /* Default text domain in which entries for gettext(3) are to be found. */ extern const char *k_nl_current_default_domain; /* Set the current default message catalog to DOMAINNAME. If DOMAINNAME is null, return the current default. If DOMAINNAME is "", reset to the default of "messages". */ char * k_textdomain (const char *domainname) { char *old; /* A NULL pointer requests the current setting. */ if (domainname == NULL) return (char *) k_nl_current_default_domain; old = (char *) k_nl_current_default_domain; /* If domain name is the null string set to default domain "messages". */ if (domainname[0] == '\0' || strcmp (domainname, _nl_default_default_domain) == 0) k_nl_current_default_domain = _nl_default_default_domain; else { /* If the following malloc fails `k_nl_current_default_domain' will be NULL. This value will be returned and so signals we are out of core. */ size_t len = strlen (domainname) + 1; char *cp = (char *) malloc (len); if (cp != NULL) memcpy (cp, domainname, len); k_nl_current_default_domain = cp; } if (old != _nl_default_default_domain) free (old); return (char *) k_nl_current_default_domain; }