diff --git a/generators/chm/lib/ebook_chm.cpp b/generators/chm/lib/ebook_chm.cpp
index 31921ce4f..732d29fc8 100644
--- a/generators/chm/lib/ebook_chm.cpp
+++ b/generators/chm/lib/ebook_chm.cpp
@@ -1,1112 +1,1112 @@
/*
* Kchmviewer - a CHM and EPUB file viewer with broad language support
* Copyright (C) 2004-2014 George Yunaev, gyunaev@ulduzsoft.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include "ebook_chm.h"
#include "ebook_chm_encoding.h"
#include "helper_entitydecoder.h"
#include "bitfiddle.h"
// Big-enough buffer size for use with various routines.
#define BUF_SIZE 4096
#define COMMON_BUF_LEN 1025
#define TOPICS_ENTRY_LEN 16
#define URLTBL_ENTRY_LEN 12
//#define DEBUGPARSER(A) qDebug A
#define DEBUGPARSER(A)
static const char * URL_SCHEME_CHM = "ms-its";
EBook_CHM::EBook_CHM()
: EBook()
{
m_envOptions = getenv("KCHMVIEWEROPTS");
m_chmFile = NULL;
m_filename = m_font = QString::null;
m_textCodec = 0;
m_textCodecForSpecialFiles = 0;
m_detectedLCID = 0;
m_currentEncoding = "UTF-8";
m_htmlEntityDecoder = 0;
}
EBook_CHM::~EBook_CHM()
{
close();
}
void EBook_CHM::close()
{
if ( m_chmFile == NULL )
return;
chm_close( m_chmFile );
m_chmFile = NULL;
m_filename = m_font = QString::null;
m_home.clear();
m_topicsFile.clear();
m_indexFile.clear();
m_textCodec = 0;
m_textCodecForSpecialFiles = 0;
m_detectedLCID = 0;
m_currentEncoding = "UTF-8";
}
QString EBook_CHM::title() const
{
return encodeWithCurrentCodec( m_title );
}
QUrl EBook_CHM::homeUrl() const
{
return pathToUrl( m_home );
}
bool EBook_CHM::hasFeature(EBook::Feature code) const
{
switch ( code )
{
case FEATURE_TOC:
return m_tocAvailable;
case FEATURE_INDEX:
return m_indexAvailable;
case FEATURE_ENCODING:
return true;
}
return false;
}
bool EBook_CHM::getTableOfContents( QList &toc ) const
{
if ( parseBinaryTOC( toc ) )
return true;
// Parse the plain text TOC
QList< ParsedEntry > parsed;
if ( !parseFileAndFillArray( m_topicsFile, parsed, false ) )
return false;
// Find out the root offset, and reduce the indent level to it
// so the toc starts from zero offset.
int root_offset = -1;
// Fill up the real toc
toc.reserve( parsed.size() );
Q_FOREACH( const ParsedEntry& e, parsed )
{
if ( root_offset == -1 )
root_offset = e.indent;
EBookTocEntry entry;
entry.iconid = (EBookTocEntry::Icon) e.iconid;
entry.indent = e.indent - root_offset;
entry.name = e.name;
if ( !e.urls.empty() )
entry.url = e.urls[0];
toc.append( entry );
}
return true;
}
bool EBook_CHM::getIndex(QList &index) const
{
// Parse the plain text index
QList< ParsedEntry > parsed;
if ( !parseFileAndFillArray( m_indexFile, parsed, true ) )
return false;
// Find out the root offset, and reduce the indent level to it
// so the index starts from zero offset.
int root_offset = 0;
// Fill up the real index
index.reserve( parsed.size() );
// Find the index root offset
Q_FOREACH( const ParsedEntry& e, parsed )
{
if ( e.urls.empty() )
continue;
root_offset = qMin( root_offset, e.indent );
}
// And apply the index
Q_FOREACH( const ParsedEntry& e, parsed )
{
if ( e.urls.empty() )
continue;
EBookIndexEntry entry;
entry.name = e.name;
entry.urls = e.urls;
entry.seealso = e.seealso;
// If the index array is empty, make sure the first entry is on root offset
if ( index.isEmpty() )
entry.indent = root_offset;
else
entry.indent = e.indent - root_offset;
index.append( entry );
printf("%d: %s\n", entry.indent, qPrintable(entry.name));
}
return true;
}
bool EBook_CHM::getFileContentAsString( QString &str, const QUrl &url ) const
{
return getTextContent( str, urlToPath( url ) );
}
bool EBook_CHM::getFileContentAsBinary( QByteArray &data, const QUrl &url ) const
{
return getBinaryContent( data, urlToPath(url) );
}
bool EBook_CHM::getBinaryContent( QByteArray &data, const QString &url ) const
{
chmUnitInfo ui;
if( !ResolveObject( url, &ui ) )
return false;
data.resize( ui.length );
if ( RetrieveObject( &ui, (unsigned char*) data.data(), 0, ui.length ) )
return true;
return false;
}
bool EBook_CHM::getTextContent( QString& str, const QString& url, bool internal_encoding ) const
{
QByteArray buf;
if ( getBinaryContent( buf, url ) )
{
unsigned int length = buf.size();
if ( length > 0 )
{
buf.resize( length + 1 );
buf [length] = '\0';
str = internal_encoding ? (QString)( buf.constData() ) : encodeWithCurrentCodec( buf.constData() );
return true;
}
}
return false;
}
int EBook_CHM::getContentSize(const QString &url)
{
chmUnitInfo ui;
if( !ResolveObject( url, &ui ) )
return -1;
return ui.length;
}
bool EBook_CHM::load(const QString &archiveName)
{
QString filename;
// If the file has a file:// prefix, remove it
if ( archiveName.startsWith( "file://" ) )
filename = archiveName.mid( 7 ); // strip it
else
filename = archiveName;
if( m_chmFile )
close();
#if defined (WIN32)
// chm_open on Windows OS uses the following prototype:
// struct chmFile* chm_open(BSTR filename);
//
// however internally it simply passes the filename
// directly to CreateFileW function without any conversion.
// Thus we need to pass it as WCHAR * and not BSTR.
m_chmFile = chm_open( (BSTR) filename.toStdWString().c_str() );
#else
m_chmFile = chm_open( QFile::encodeName(filename) );
#endif
if ( m_chmFile == NULL )
return false;
m_filename = filename;
// Reset encoding
m_textCodec = 0;
m_textCodecForSpecialFiles = 0;
m_currentEncoding = "UTF-8";
// Get information from /#WINDOWS and /#SYSTEM files (encoding, title, context file and so)
// and guess the encoding
getInfoFromWindows();
getInfoFromSystem();
guessTextEncoding();
// Check whether the search tables are present
if ( ResolveObject("/#TOPICS", &m_chmTOPICS)
&& ResolveObject("/#STRINGS", &m_chmSTRINGS)
&& ResolveObject("/#URLTBL", &m_chmURLTBL)
&& ResolveObject("/#URLSTR", &m_chmURLSTR) )
{
m_lookupTablesValid = true;
fillTopicsUrlMap();
}
else
m_lookupTablesValid = false;
// Some CHM files have toc and index files, but do not set the name properly.
// Some heuristics here.
if ( m_topicsFile.isEmpty() && hasFile( "/toc.hhc" ) )
m_topicsFile = "/toc.hhc";
if ( m_indexFile.isEmpty() && hasFile( "/index.hhk" ) )
m_indexFile = "/index.hhk";
if ( !m_topicsFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/#TOCIDX" ) ) )
m_tocAvailable = true;
else
m_tocAvailable = false;
if ( !m_indexFile.isEmpty() || ( m_lookupTablesValid && hasFile( "/$WWKeywordLinks/BTree" ) ) )
m_indexAvailable = true;
else
m_indexAvailable = false;
return true;
}
int EBook_CHM::findStringInQuotes (const QString& tag, int offset, QString& value, bool firstquote, bool decodeentities) const
{
int qbegin = tag.indexOf ('"', offset);
if ( qbegin == -1 )
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find first quote in tag: '%s'", qPrintable( tag ));
int qend = firstquote ? tag.indexOf ('"', qbegin + 1) : tag.lastIndexOf ('"');
if ( qend == -1 || qend <= qbegin )
qFatal ("EBook_CHMImpl::findStringInQuotes: cannot find last quote in tag: '%s'", qPrintable( tag ));
// If we do not need to decode HTML entities, just return.
if ( decodeentities )
{
QString htmlentity = QString::null;
bool fill_entity = false;
value.reserve (qend - qbegin); // to avoid multiple memory allocations
for ( int i = qbegin + 1; i < qend; i++ )
{
if ( !fill_entity )
{
if ( tag[i] == '&' ) // HTML entity starts
fill_entity = true;
else
value.append (tag[i]);
}
else
{
if ( tag[i] == ';' ) // HTML entity ends
{
// If entity is an ASCII code, just decode it
QString decode = m_htmlEntityDecoder.decode( htmlentity );
if ( decode.isNull() )
break;
value.append ( decode );
htmlentity = QString::null;
fill_entity = false;
}
else
htmlentity.append (tag[i]);
}
}
}
else
value = tag.mid (qbegin + 1, qend - qbegin - 1);
return qend + 1;
}
bool EBook_CHM::parseFileAndFillArray( const QString& file, QList< ParsedEntry >& data, bool asIndex ) const
{
QString src;
const int MAX_NEST_DEPTH = 256;
if ( !getTextContent( src, file ) || src.isEmpty() )
return false;
/*
// Save the index for debugging purposes
QFile outfile( "parsed.htm" );
if ( outfile.open( QIODevice::WriteOnly ) )
{
QTextStream textstream( &outfile );
textstream << src;
outfile.close();
}
*/
EBookTocEntry::Icon defaultimagenum = EBookTocEntry::IMAGE_AUTO;
int pos = 0, indent = 0, root_indent_offset = 0;
bool in_object = false, root_indent_offset_set = false;
ParsedEntry entry;
entry.iconid = defaultimagenum;
// Split the HHC file by HTML tags
int stringlen = src.length();
while ( pos < stringlen && (pos = src.indexOf ('<', pos)) != -1 )
{
int i, word_end = 0;
for ( i = ++pos; i < stringlen; i++ )
{
// If a " or ' is found, skip to the next one.
if ( (src[i] == '"' || src[i] == '\'') )
{
// find where quote ends, either by another quote, or by '>' symbol (some people don't know HTML)
int nextpos = src.indexOf (src[i], i+1);
if ( nextpos == -1 && (nextpos = src.indexOf ('>', i+1)) == -1 )
{
qWarning ("EBook_CHMImpl::ParseHhcAndFillTree: corrupted TOC: %s", qPrintable( src.mid(i) ));
return false;
}
i = nextpos;
}
else if ( src[i] == '>' )
break;
else if ( !src[i].isLetterOrNumber() && src[i] != '/' && !word_end )
word_end = i;
}
QString tagword, tag = src.mid (pos, i - pos);
if ( word_end )
tagword = src.mid (pos, word_end - pos).toLower();
else
tagword = tag.toLower();
//DEBUGPARSER(("tag: '%s', tagword: '%s'\n", qPrintable( tag ), qPrintable( tagword ) ));
//