diff --git a/core/page.cpp b/core/page.cpp index 227ebf801..33827984a 100644 --- a/core/page.cpp +++ b/core/page.cpp @@ -1,1123 +1,1123 @@ /*************************************************************************** * Copyright (C) 2004 by Enrico Ros * * Copyright (C) 2017 Klarälvdalens Datakonsult AB, a KDAB Group * * company, info@kdab.com. Work sponsored by the * * LiMux project of the city of Munich * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * ***************************************************************************/ #include "page.h" #include "page_p.h" // qt/kde includes #include #include #include #include #include #include #include #include #include // local includes #include "action.h" #include "annotations.h" #include "annotations_p.h" #include "area.h" #include "debug_p.h" #include "document.h" #include "document_p.h" #include "form.h" #include "form_p.h" #include "observer.h" #include "pagecontroller_p.h" #include "pagesize.h" #include "pagetransition.h" #include "rotationjob_p.h" #include "textpage.h" #include "textpage_p.h" #include "tile.h" #include "tilesmanager_p.h" #include "utils_p.h" #include #ifdef PAGE_PROFILE #include #endif using namespace Okular; static const double distanceConsideredEqual = 25; // 5px static void deleteObjectRects( QLinkedList< ObjectRect * >& rects, const QSet& which ) { QLinkedList< ObjectRect * >::iterator it = rects.begin(), end = rects.end(); for ( ; it != end; ) if ( which.contains( (*it)->objectType() ) ) { delete *it; it = rects.erase( it ); } else ++it; } PagePrivate::PagePrivate( Page *page, uint n, double w, double h, Rotation o ) : m_page( page ), m_number( n ), m_orientation( o ), m_width( w ), m_height( h ), m_doc( nullptr ), m_boundingBox( 0, 0, 1, 1 ), m_rotation( Rotation0 ), m_text( nullptr ), m_transition( nullptr ), m_textSelections( nullptr ), m_openingAction( nullptr ), m_closingAction( nullptr ), m_duration( -1 ), m_isBoundingBoxKnown( false ) { // avoid Division-By-Zero problems in the program if ( m_width <= 0 ) m_width = 1; if ( m_height <= 0 ) m_height = 1; } PagePrivate::~PagePrivate() { qDeleteAll( formfields ); delete m_openingAction; delete m_closingAction; delete m_text; delete m_transition; } PagePrivate *PagePrivate::get( Page * page ) { - return page->d; + return page ? page->d : nullptr; } void PagePrivate::imageRotationDone( RotationJob * job ) { TilesManager *tm = tilesManager( job->observer() ); if ( tm ) { QPixmap *pixmap = new QPixmap( QPixmap::fromImage( job->image() ) ); tm->setPixmap( pixmap, job->rect() ); delete pixmap; return; } QMap< DocumentObserver*, PixmapObject >::iterator it = m_pixmaps.find( job->observer() ); if ( it != m_pixmaps.end() ) { PixmapObject &object = it.value(); (*object.m_pixmap) = QPixmap::fromImage( job->image() ); object.m_rotation = job->rotation(); } else { PixmapObject object; object.m_pixmap = new QPixmap( QPixmap::fromImage( job->image() ) ); object.m_rotation = job->rotation(); m_pixmaps.insert( job->observer(), object ); } } QTransform PagePrivate::rotationMatrix() const { return Okular::buildRotationMatrix( m_rotation ); } /** class Page **/ Page::Page( uint page, double w, double h, Rotation o ) : d( new PagePrivate( this, page, w, h, o ) ) { } Page::~Page() { if (d) { deletePixmaps(); deleteRects(); d->deleteHighlights(); deleteAnnotations(); d->deleteTextSelections(); deleteSourceReferences(); delete d; } } int Page::number() const { return d->m_number; } Rotation Page::orientation() const { return d->m_orientation; } Rotation Page::rotation() const { return d->m_rotation; } Rotation Page::totalOrientation() const { return (Rotation)( ( (int)d->m_orientation + (int)d->m_rotation ) % 4 ); } double Page::width() const { return d->m_width; } double Page::height() const { return d->m_height; } double Page::ratio() const { return d->m_height / d->m_width; } NormalizedRect Page::boundingBox() const { return d->m_boundingBox; } bool Page::isBoundingBoxKnown() const { return d->m_isBoundingBoxKnown; } void Page::setBoundingBox( const NormalizedRect& bbox ) { if ( d->m_isBoundingBoxKnown && d->m_boundingBox == bbox ) return; // Allow tiny rounding errors (happens during rotation) static const double epsilon = 0.00001; Q_ASSERT( bbox.left >= -epsilon && bbox.top >= -epsilon && bbox.right <= 1 + epsilon && bbox.bottom <= 1 + epsilon ); d->m_boundingBox = bbox & NormalizedRect( 0., 0., 1., 1. ); d->m_isBoundingBoxKnown = true; } bool Page::hasPixmap( DocumentObserver *observer, int width, int height, const NormalizedRect &rect ) const { TilesManager *tm = d->tilesManager( observer ); if ( tm ) { if ( width != tm->width() || height != tm->height() ) { // FIXME hasPixmap should not be calling setSize on the TilesManager this is not very "const" // as this function claims to be if ( width != -1 && height != -1 ) { tm->setSize( width, height ); } return false; } return tm->hasPixmap( rect ); } QMap< DocumentObserver*, PagePrivate::PixmapObject >::const_iterator it = d->m_pixmaps.constFind( observer ); if ( it == d->m_pixmaps.constEnd() ) return false; if ( width == -1 || height == -1 ) return true; const QPixmap *pixmap = it.value().m_pixmap; return (pixmap->width() == width && pixmap->height() == height); } bool Page::hasTextPage() const { return d->m_text != nullptr; } RegularAreaRect * Page::wordAt( const NormalizedPoint &p, QString *word ) const { if ( d->m_text ) return d->m_text->wordAt( p, word ); return nullptr; } RegularAreaRect * Page::textArea ( TextSelection * selection ) const { if ( d->m_text ) return d->m_text->textArea( selection ); return nullptr; } bool Page::hasObjectRect( double x, double y, double xScale, double yScale ) const { if ( m_rects.isEmpty() ) return false; QLinkedList< ObjectRect * >::const_iterator it = m_rects.begin(), end = m_rects.end(); for ( ; it != end; ++it ) if ( (*it)->distanceSqr( x, y, xScale, yScale ) < distanceConsideredEqual ) return true; return false; } bool Page::hasHighlights( int s_id ) const { // simple case: have no highlights if ( m_highlights.isEmpty() ) return false; // simple case: we have highlights and no id to match if ( s_id == -1 ) return true; // iterate on the highlights list to find an entry by id QLinkedList< HighlightAreaRect * >::const_iterator it = m_highlights.begin(), end = m_highlights.end(); for ( ; it != end; ++it ) if ( (*it)->s_id == s_id ) return true; return false; } bool Page::hasTransition() const { return d->m_transition != nullptr; } bool Page::hasAnnotations() const { return !m_annotations.isEmpty(); } RegularAreaRect * Page::findText( int id, const QString & text, SearchDirection direction, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect ) const { RegularAreaRect* rect = nullptr; if ( text.isEmpty() || !d->m_text ) return rect; rect = d->m_text->findText( id, text, direction, caseSensitivity, lastRect ); return rect; } QString Page::text( const RegularAreaRect * area ) const { return text( area, TextPage::AnyPixelTextAreaInclusionBehaviour ); } QString Page::text( const RegularAreaRect * area, TextPage::TextAreaInclusionBehaviour b ) const { QString ret; if ( !d->m_text ) return ret; if ( area ) { RegularAreaRect rotatedArea = *area; rotatedArea.transform( d->rotationMatrix().inverted() ); ret = d->m_text->text( &rotatedArea, b ); } else ret = d->m_text->text( nullptr, b ); return ret; } TextEntity::List Page::words( const RegularAreaRect * area, TextPage::TextAreaInclusionBehaviour b ) const { TextEntity::List ret; if ( !d->m_text ) return ret; if ( area ) { RegularAreaRect rotatedArea = *area; rotatedArea.transform( d->rotationMatrix().inverted() ); ret = d->m_text->words( &rotatedArea, b ); } else ret = d->m_text->words( nullptr, b ); for (int i = 0; i < ret.length(); ++i) { const TextEntity * orig = ret[i]; ret[i] = new TextEntity( orig->text(), new Okular::NormalizedRect(orig->transformedArea ( d->rotationMatrix() )) ); delete orig; } return ret; } void PagePrivate::rotateAt( Rotation orientation ) { if ( orientation == m_rotation ) return; deleteHighlights(); deleteTextSelections(); if ( ( (int)m_orientation + (int)m_rotation ) % 2 != ( (int)m_orientation + (int)orientation ) % 2 ) qSwap( m_width, m_height ); Rotation oldRotation = m_rotation; m_rotation = orientation; /** * Rotate the images of the page. */ QMapIterator< DocumentObserver*, PagePrivate::PixmapObject > it( m_pixmaps ); while ( it.hasNext() ) { it.next(); const PagePrivate::PixmapObject &object = it.value(); RotationJob *job = new RotationJob( object.m_pixmap->toImage(), object.m_rotation, m_rotation, it.key() ); job->setPage( this ); m_doc->m_pageController->addRotationJob(job); } /** * Rotate tiles manager */ QMapIterator i(m_tilesManagers); while (i.hasNext()) { i.next(); TilesManager *tm = i.value(); if ( tm ) tm->setRotation( m_rotation ); } /** * Rotate the object rects on the page. */ const QTransform matrix = rotationMatrix(); QLinkedList< ObjectRect * >::const_iterator objectIt = m_page->m_rects.begin(), end = m_page->m_rects.end(); for ( ; objectIt != end; ++objectIt ) (*objectIt)->transform( matrix ); QLinkedList< HighlightAreaRect* >::const_iterator hlIt = m_page->m_highlights.begin(), hlItEnd = m_page->m_highlights.end(); for ( ; hlIt != hlItEnd; ++hlIt ) { (*hlIt)->transform( RotationJob::rotationMatrix( oldRotation, m_rotation ) ); } } void PagePrivate::changeSize( const PageSize &size ) { if ( size.isNull() || ( size.width() == m_width && size.height() == m_height ) ) return; m_page->deletePixmaps(); // deleteHighlights(); // deleteTextSelections(); m_width = size.width(); m_height = size.height(); if ( m_rotation % 2 ) qSwap( m_width, m_height ); } const ObjectRect * Page::objectRect( ObjectRect::ObjectType type, double x, double y, double xScale, double yScale ) const { // Walk list in reverse order so that annotations in the foreground are preferred QLinkedListIterator< ObjectRect * > it( m_rects ); it.toBack(); while ( it.hasPrevious() ) { const ObjectRect *objrect = it.previous(); if ( ( objrect->objectType() == type ) && objrect->distanceSqr( x, y, xScale, yScale ) < distanceConsideredEqual ) return objrect; } return nullptr; } QLinkedList< const ObjectRect * > Page::objectRects( ObjectRect::ObjectType type, double x, double y, double xScale, double yScale ) const { QLinkedList< const ObjectRect * > result; QLinkedListIterator< ObjectRect * > it( m_rects ); it.toBack(); while ( it.hasPrevious() ) { const ObjectRect *objrect = it.previous(); if ( ( objrect->objectType() == type ) && objrect->distanceSqr( x, y, xScale, yScale ) < distanceConsideredEqual ) result.append( objrect ); } return result; } const ObjectRect* Page::nearestObjectRect( ObjectRect::ObjectType type, double x, double y, double xScale, double yScale, double * distance ) const { ObjectRect * res = nullptr; double minDistance = std::numeric_limits::max(); QLinkedList< ObjectRect * >::const_iterator it = m_rects.constBegin(), end = m_rects.constEnd(); for ( ; it != end; ++it ) { if ( (*it)->objectType() == type ) { double d = (*it)->distanceSqr( x, y, xScale, yScale ); if ( d < minDistance ) { res = (*it); minDistance = d; } } } if ( distance ) *distance = minDistance; return res; } const PageTransition * Page::transition() const { return d->m_transition; } QLinkedList< Annotation* > Page::annotations() const { return m_annotations; } Annotation * Page::annotation( const QString & uniqueName ) const { foreach(Annotation *a, m_annotations) { if ( a->uniqueName() == uniqueName ) return a; } return nullptr; } const Action * Page::pageAction( PageAction action ) const { switch ( action ) { case Page::Opening: return d->m_openingAction; break; case Page::Closing: return d->m_closingAction; break; } return nullptr; } QLinkedList< FormField * > Page::formFields() const { return d->formfields; } void Page::setPixmap( DocumentObserver *observer, QPixmap *pixmap, const NormalizedRect &rect ) { if ( d->m_rotation == Rotation0 ) { TilesManager *tm = d->tilesManager( observer ); if ( tm ) { tm->setPixmap( pixmap, rect ); delete pixmap; return; } QMap< DocumentObserver*, PagePrivate::PixmapObject >::iterator it = d->m_pixmaps.find( observer ); if ( it != d->m_pixmaps.end() ) { delete it.value().m_pixmap; } else { it = d->m_pixmaps.insert( observer, PagePrivate::PixmapObject() ); } it.value().m_pixmap = pixmap; it.value().m_rotation = d->m_rotation; } else { // it can happen that we get a setPixmap while closing and thus the page controller is gone if ( d->m_doc->m_pageController ) { RotationJob *job = new RotationJob( pixmap->toImage(), Rotation0, d->m_rotation, observer ); job->setPage( d ); job->setRect( TilesManager::toRotatedRect( rect, d->m_rotation ) ); d->m_doc->m_pageController->addRotationJob(job); } delete pixmap; } } void Page::setTextPage( TextPage * textPage ) { delete d->m_text; d->m_text = textPage; if ( d->m_text ) { - d->m_text->d->m_page = d; + d->m_text->d->m_page = this; /** * Correct text order for before text selection */ d->m_text->d->correctTextOrder(); } } void Page::setObjectRects( const QLinkedList< ObjectRect * > & rects ) { QSet which; which << ObjectRect::Action << ObjectRect::Image; deleteObjectRects( m_rects, which ); /** * Rotate the object rects of the page. */ const QTransform matrix = d->rotationMatrix(); QLinkedList< ObjectRect * >::const_iterator objectIt = rects.begin(), end = rects.end(); for ( ; objectIt != end; ++objectIt ) (*objectIt)->transform( matrix ); m_rects << rects; } void PagePrivate::setHighlight( int s_id, RegularAreaRect *rect, const QColor & color ) { HighlightAreaRect * hr = new HighlightAreaRect(rect); hr->s_id = s_id; hr->color = color; m_page->m_highlights.append( hr ); } void PagePrivate::setTextSelections( RegularAreaRect *r, const QColor & color ) { deleteTextSelections(); if ( r ) { HighlightAreaRect * hr = new HighlightAreaRect( r ); hr->s_id = -1; hr->color = color; m_textSelections = hr; delete r; } } void Page::setSourceReferences( const QLinkedList< SourceRefObjectRect * > & refRects ) { deleteSourceReferences(); foreach( SourceRefObjectRect * rect, refRects ) m_rects << rect; } void Page::setDuration( double seconds ) { d->m_duration = seconds; } double Page::duration() const { return d->m_duration; } void Page::setLabel( const QString& label ) { d->m_label = label; } QString Page::label() const { return d->m_label; } const RegularAreaRect * Page::textSelection() const { return d->m_textSelections; } QColor Page::textSelectionColor() const { return d->m_textSelections ? d->m_textSelections->color : QColor(); } void Page::addAnnotation( Annotation * annotation ) { // Generate uniqueName: okular-{UUID} if(annotation->uniqueName().isEmpty()) { QString uniqueName = QStringLiteral("okular-") + QUuid::createUuid().toString(); annotation->setUniqueName( uniqueName ); } annotation->d_ptr->m_page = d; m_annotations.append( annotation ); AnnotationObjectRect *rect = new AnnotationObjectRect( annotation ); // Rotate the annotation on the page. const QTransform matrix = d->rotationMatrix(); annotation->d_ptr->annotationTransform( matrix ); m_rects.append( rect ); } bool Page::removeAnnotation( Annotation * annotation ) { if ( !d->m_doc->m_parent->canRemovePageAnnotation(annotation) ) return false; QLinkedList< Annotation * >::iterator aIt = m_annotations.begin(), aEnd = m_annotations.end(); for ( ; aIt != aEnd; ++aIt ) { if((*aIt) && (*aIt)->uniqueName()==annotation->uniqueName()) { int rectfound = false; QLinkedList< ObjectRect * >::iterator it = m_rects.begin(), end = m_rects.end(); for ( ; it != end && !rectfound; ++it ) if ( ( (*it)->objectType() == ObjectRect::OAnnotation ) && ( (*it)->object() == (*aIt) ) ) { delete *it; it = m_rects.erase( it ); rectfound = true; } qCDebug(OkularCoreDebug) << "removed annotation:" << annotation->uniqueName(); annotation->d_ptr->m_page = nullptr; m_annotations.erase( aIt ); break; } } return true; } void Page::setTransition( PageTransition * transition ) { delete d->m_transition; d->m_transition = transition; } void Page::setPageAction( PageAction action, Action * link ) { switch ( action ) { case Page::Opening: delete d->m_openingAction; d->m_openingAction = link; break; case Page::Closing: delete d->m_closingAction; d->m_closingAction = link; break; } } void Page::setFormFields( const QLinkedList< FormField * >& fields ) { qDeleteAll( d->formfields ); d->formfields = fields; QLinkedList< FormField * >::const_iterator it = d->formfields.begin(), itEnd = d->formfields.end(); for ( ; it != itEnd; ++it ) { (*it)->d_ptr->setDefault(); } } void Page::deletePixmap( DocumentObserver *observer ) { TilesManager *tm = d->tilesManager( observer ); if ( tm ) { delete tm; d->m_tilesManagers.remove(observer); } else { PagePrivate::PixmapObject object = d->m_pixmaps.take( observer ); delete object.m_pixmap; } } void Page::deletePixmaps() { QMapIterator< DocumentObserver*, PagePrivate::PixmapObject > it( d->m_pixmaps ); while ( it.hasNext() ) { it.next(); delete it.value().m_pixmap; } d->m_pixmaps.clear(); qDeleteAll(d->m_tilesManagers); d->m_tilesManagers.clear(); } void Page::deleteRects() { // delete ObjectRects of type Link and Image QSet which; which << ObjectRect::Action << ObjectRect::Image; deleteObjectRects( m_rects, which ); } void PagePrivate::deleteHighlights( int s_id ) { // delete highlights by ID QLinkedList< HighlightAreaRect* >::iterator it = m_page->m_highlights.begin(), end = m_page->m_highlights.end(); while ( it != end ) { HighlightAreaRect* highlight = *it; if ( s_id == -1 || highlight->s_id == s_id ) { it = m_page->m_highlights.erase( it ); delete highlight; } else ++it; } } void PagePrivate::deleteTextSelections() { delete m_textSelections; m_textSelections = nullptr; } void Page::deleteSourceReferences() { deleteObjectRects( m_rects, QSet() << ObjectRect::SourceRef ); } void Page::deleteAnnotations() { // delete ObjectRects of type Annotation deleteObjectRects( m_rects, QSet() << ObjectRect::OAnnotation ); // delete all stored annotations QLinkedList< Annotation * >::const_iterator aIt = m_annotations.begin(), aEnd = m_annotations.end(); for ( ; aIt != aEnd; ++aIt ) delete *aIt; m_annotations.clear(); } bool PagePrivate::restoreLocalContents( const QDomNode & pageNode ) { bool loadedAnything = false; // set if something actually gets loaded // iterate over all chilren (annotationList, ...) QDomNode childNode = pageNode.firstChild(); while ( childNode.isElement() ) { QDomElement childElement = childNode.toElement(); childNode = childNode.nextSibling(); // parse annotationList child element if ( childElement.tagName() == QLatin1String("annotationList") ) { #ifdef PAGE_PROFILE QTime time; time.start(); #endif // Clone annotationList as root node in restoredLocalAnnotationList const QDomNode clonedNode = restoredLocalAnnotationList.importNode( childElement, true ); restoredLocalAnnotationList.appendChild( clonedNode ); // iterate over all annotations QDomNode annotationNode = childElement.firstChild(); while( annotationNode.isElement() ) { // get annotation element and advance to next annot QDomElement annotElement = annotationNode.toElement(); annotationNode = annotationNode.nextSibling(); // get annotation from the dom element Annotation * annotation = AnnotationUtils::createAnnotation( annotElement ); // append annotation to the list or show warning if ( annotation ) { m_doc->performAddPageAnnotation(m_number, annotation); qCDebug(OkularCoreDebug) << "restored annot:" << annotation->uniqueName(); loadedAnything = true; } else qCWarning(OkularCoreDebug).nospace() << "page (" << m_number << "): can't restore an annotation from XML."; } #ifdef PAGE_PROFILE qCDebug(OkularCoreDebug).nospace() << "annots: XML Load time: " << time.elapsed() << "ms"; #endif } // parse formList child element else if ( childElement.tagName() == QLatin1String("forms") ) { // Clone forms as root node in restoredFormFieldList const QDomNode clonedNode = restoredFormFieldList.importNode( childElement, true ); restoredFormFieldList.appendChild( clonedNode ); if ( formfields.isEmpty() ) continue; QHash hashedforms; QLinkedList< FormField * >::const_iterator fIt = formfields.begin(), fItEnd = formfields.end(); for ( ; fIt != fItEnd; ++fIt ) { hashedforms[(*fIt)->id()] = (*fIt); } // iterate over all forms QDomNode formsNode = childElement.firstChild(); while( formsNode.isElement() ) { // get annotation element and advance to next annot QDomElement formElement = formsNode.toElement(); formsNode = formsNode.nextSibling(); if ( formElement.tagName() != QLatin1String("form") ) continue; bool ok = true; int index = formElement.attribute( QStringLiteral("id") ).toInt( &ok ); if ( !ok ) continue; QHash::const_iterator wantedIt = hashedforms.constFind( index ); if ( wantedIt == hashedforms.constEnd() ) continue; QString value = formElement.attribute( QStringLiteral("value") ); (*wantedIt)->d_ptr->setValue( value ); loadedAnything = true; } } } return loadedAnything; } void PagePrivate::saveLocalContents( QDomNode & parentNode, QDomDocument & document, PageItems what ) const { // create the page node and set the 'number' attribute QDomElement pageElement = document.createElement( QStringLiteral("page") ); pageElement.setAttribute( QStringLiteral("number"), m_number ); #if 0 // add bookmark info if is bookmarked if ( d->m_bookmarked ) { // create the pageElement's 'bookmark' child QDomElement bookmarkElement = document.createElement( "bookmark" ); pageElement.appendChild( bookmarkElement ); // add attributes to the element //bookmarkElement.setAttribute( "name", bookmark name ); } #endif // add annotations info if has got any if ( ( what & AnnotationPageItems ) && ( what & OriginalAnnotationPageItems ) ) { const QDomElement savedDocRoot = restoredLocalAnnotationList.documentElement(); if ( !savedDocRoot.isNull() ) { // Import and append node in target document const QDomNode importedNode = document.importNode( savedDocRoot, true ); pageElement.appendChild( importedNode ); } } else if ( ( what & AnnotationPageItems ) && !m_page->m_annotations.isEmpty() ) { // create the annotationList QDomElement annotListElement = document.createElement( QStringLiteral("annotationList") ); // add every annotation to the annotationList QLinkedList< Annotation * >::const_iterator aIt = m_page->m_annotations.constBegin(), aEnd = m_page->m_annotations.constEnd(); for ( ; aIt != aEnd; ++aIt ) { // get annotation const Annotation * a = *aIt; // only save okular annotations (not the embedded in file ones) if ( !(a->flags() & Annotation::External) ) { // append an filled-up element called 'annotation' to the list QDomElement annElement = document.createElement( QStringLiteral("annotation") ); AnnotationUtils::storeAnnotation( a, annElement, document ); annotListElement.appendChild( annElement ); qCDebug(OkularCoreDebug) << "save annotation:" << a->uniqueName(); } } // append the annotationList element if annotations have been set if ( annotListElement.hasChildNodes() ) pageElement.appendChild( annotListElement ); } // add forms info if has got any if ( ( what & FormFieldPageItems ) && ( what & OriginalFormFieldPageItems ) ) { const QDomElement savedDocRoot = restoredFormFieldList.documentElement(); if ( !savedDocRoot.isNull() ) { // Import and append node in target document const QDomNode importedNode = document.importNode( savedDocRoot, true ); pageElement.appendChild( importedNode ); } } else if ( ( what & FormFieldPageItems ) && !formfields.isEmpty() ) { // create the formList QDomElement formListElement = document.createElement( QStringLiteral("forms") ); // add every form data to the formList QLinkedList< FormField * >::const_iterator fIt = formfields.constBegin(), fItEnd = formfields.constEnd(); for ( ; fIt != fItEnd; ++fIt ) { // get the form field const FormField * f = *fIt; QString newvalue = f->d_ptr->value(); if ( f->d_ptr->m_default == newvalue ) continue; // append an filled-up element called 'annotation' to the list QDomElement formElement = document.createElement( QStringLiteral("form") ); formElement.setAttribute( QStringLiteral("id"), f->id() ); formElement.setAttribute( QStringLiteral("value"), newvalue ); formListElement.appendChild( formElement ); } // append the annotationList element if annotations have been set if ( formListElement.hasChildNodes() ) pageElement.appendChild( formListElement ); } // append the page element only if has children if ( pageElement.hasChildNodes() ) parentNode.appendChild( pageElement ); } const QPixmap * Page::_o_nearestPixmap( DocumentObserver *observer, int w, int h ) const { Q_UNUSED( h ) const QPixmap * pixmap = nullptr; // if a pixmap is present for given id, use it QMap< DocumentObserver*, PagePrivate::PixmapObject >::const_iterator itPixmap = d->m_pixmaps.constFind( observer ); if ( itPixmap != d->m_pixmaps.constEnd() ) pixmap = itPixmap.value().m_pixmap; // else find the closest match using pixmaps of other IDs (great optim!) else if ( !d->m_pixmaps.isEmpty() ) { int minDistance = -1; QMap< DocumentObserver*, PagePrivate::PixmapObject >::const_iterator it = d->m_pixmaps.constBegin(), end = d->m_pixmaps.constEnd(); for ( ; it != end; ++it ) { int pixWidth = (*it).m_pixmap->width(), distance = pixWidth > w ? pixWidth - w : w - pixWidth; if ( minDistance == -1 || distance < minDistance ) { pixmap = (*it).m_pixmap; minDistance = distance; } } } return pixmap; } bool Page::hasTilesManager( const DocumentObserver *observer ) const { return d->tilesManager( observer ) != nullptr; } QList Page::tilesAt( const DocumentObserver *observer, const NormalizedRect &rect ) const { TilesManager *tm = d->m_tilesManagers.value( observer ); if ( tm ) return tm->tilesAt( rect, TilesManager::PixmapTile ); else return QList(); } TilesManager *PagePrivate::tilesManager( const DocumentObserver *observer ) const { return m_tilesManagers.value( observer ); } void PagePrivate::setTilesManager( const DocumentObserver *observer, TilesManager *tm ) { TilesManager *old = m_tilesManagers.value( observer ); delete old; m_tilesManagers.insert(observer, tm); } void PagePrivate::adoptGeneratedContents( PagePrivate *oldPage ) { rotateAt( oldPage->m_rotation ); m_pixmaps = oldPage->m_pixmaps; oldPage->m_pixmaps.clear(); m_tilesManagers = oldPage->m_tilesManagers; oldPage->m_tilesManagers.clear(); m_boundingBox = oldPage->m_boundingBox; m_isBoundingBoxKnown = oldPage->m_isBoundingBoxKnown; m_text = oldPage->m_text; oldPage->m_text = nullptr; m_textSelections = oldPage->m_textSelections; oldPage->m_textSelections = nullptr; restoredLocalAnnotationList = oldPage->restoredLocalAnnotationList; restoredFormFieldList = oldPage->restoredFormFieldList; } FormField *PagePrivate::findEquivalentForm( const Page *p, FormField *oldField ) { // given how id is not very good of id (at least for pdf) we do a few passes // same rect, type and id foreach(FormField *f, p->d->formfields) { if (f->rect() == oldField->rect() && f->type() == oldField->type() && f->id() == oldField->id()) return f; } // same rect and type foreach(FormField *f, p->d->formfields) { if (f->rect() == oldField->rect() && f->type() == oldField->type()) return f; } // fuzzy rect, same type and id foreach(FormField *f, p->d->formfields) { if (f->type() == oldField->type() && f->id() == oldField->id() && qFuzzyCompare(f->rect().left, oldField->rect().left) && qFuzzyCompare(f->rect().top, oldField->rect().top) && qFuzzyCompare(f->rect().right, oldField->rect().right) && qFuzzyCompare(f->rect().bottom, oldField->rect().bottom)) { return f; } } // fuzzy rect and same type foreach(FormField *f, p->d->formfields) { if (f->type() == oldField->type() && qFuzzyCompare(f->rect().left, oldField->rect().left) && qFuzzyCompare(f->rect().top, oldField->rect().top) && qFuzzyCompare(f->rect().right, oldField->rect().right) && qFuzzyCompare(f->rect().bottom, oldField->rect().bottom)) { return f; } } return nullptr; } diff --git a/core/textpage.cpp b/core/textpage.cpp index f2efaacdd..9622f5506 100644 --- a/core/textpage.cpp +++ b/core/textpage.cpp @@ -1,2036 +1,2038 @@ /*************************************************************************** * Copyright (C) 2005 by Piotr Szymanski * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * ***************************************************************************/ #include "textpage.h" #include "textpage_p.h" #include #include "area.h" #include "debug_p.h" #include "misc.h" #include "page.h" #include "page_p.h" #include #include #include using namespace Okular; class SearchPoint { public: SearchPoint() : offset_begin( -1 ), offset_end( -1 ) { } /** The TinyTextEntity containing the first character of the match. */ TextList::ConstIterator it_begin; /** The TinyTextEntity containing the last character of the match. */ TextList::ConstIterator it_end; /** The index of the first character of the match in (*it_begin)->text(). * Satisfies 0 <= offset_begin < (*it_begin)->text().length(). */ int offset_begin; /** One plus the index of the last character of the match in (*it_end)->text(). * Satisfies 0 < offset_end <= (*it_end)->text().length(). */ int offset_end; }; /* text comparison functions */ static bool CaseInsensitiveCmpFn( const QStringRef & from, const QStringRef & to ) { return from.compare( to, Qt::CaseInsensitive ) == 0; } static bool CaseSensitiveCmpFn( const QStringRef & from, const QStringRef & to ) { return from.compare( to, Qt::CaseSensitive ) == 0; } /** * Returns true iff segments [@p left1, @p right1] and [@p left2, @p right2] on the real line * overlap within @p threshold percent, i. e. iff the ratio of the length of the * intersection of the segments to the length of the shortest of the two input segments * is not smaller than the threshold. */ static bool segmentsOverlap(double left1, double right1, double left2, double right2, int threshold) { // check if one consumes another fully (speed optimization) if (left1 <= left2 && right1 >= right2) return true; if (left1 >= left2 && right1 <= right2) return true; // check if there is overlap above threshold if (right2 >= left1 && right1 >= left2) { double overlap = (right2 >= right1) ? right1 - left2 : right2 - left1; double length1 = right1 - left1, length2 = right2 - left2; return overlap * 100 >= threshold * qMin(length1, length2); } return false; } static bool doesConsumeY(const QRect& first, const QRect& second, int threshold) { return segmentsOverlap(first.top(), first.bottom(), second.top(), second.bottom(), threshold); } static bool doesConsumeY(const NormalizedRect& first, const NormalizedRect& second, int threshold) { return segmentsOverlap(first.top, first.bottom, second.top, second.bottom, threshold); } /* Rationale behind TinyTextEntity: instead of storing directly a QString for the text of an entity, we store the UTF-16 data and their length. This way, we save about 4 int's wrt a QString, and we can create a new string from that raw data (that's the only penalty of that). Even better, if the string we need to store has at most MaxStaticChars characters, then we store those in place of the QChar* that would be used (with new[] + free[]) for the data. */ class TinyTextEntity { static const int MaxStaticChars = sizeof( QChar * ) / sizeof( QChar ); public: TinyTextEntity( const QString &text, const NormalizedRect &rect ) : area( rect ) { Q_ASSERT_X( !text.isEmpty(), "TinyTextEntity", "empty string" ); Q_ASSERT_X( sizeof( d ) == sizeof( QChar * ), "TinyTextEntity", "internal storage is wider than QChar*, fix it!" ); length = text.length(); switch ( length ) { #if QT_POINTER_SIZE >= 8 case 4: d.qc[3] = text.at( 3 ).unicode(); // fall through case 3: d.qc[2] = text.at( 2 ).unicode(); #endif // fall through case 2: d.qc[1] = text.at( 1 ).unicode(); // fall through case 1: d.qc[0] = text.at( 0 ).unicode(); break; default: d.data = new QChar[ length ]; std::memcpy( d.data, text.constData(), length * sizeof( QChar ) ); } } ~TinyTextEntity() { if ( length > MaxStaticChars ) { delete [] d.data; } } inline QString text() const { return length <= MaxStaticChars ? QString::fromRawData( ( const QChar * )&d.qc[0], length ) : QString::fromRawData( d.data, length ); } inline NormalizedRect transformedArea( const QTransform &matrix ) const { NormalizedRect transformed_area = area; transformed_area.transform( matrix ); return transformed_area; } NormalizedRect area; private: Q_DISABLE_COPY( TinyTextEntity ) union { QChar *data; ushort qc[MaxStaticChars]; } d; int length; }; TextEntity::TextEntity( const QString &text, NormalizedRect *area ) : m_text( text ), m_area( area ), d( nullptr ) { } TextEntity::~TextEntity() { delete m_area; } QString TextEntity::text() const { return m_text; } NormalizedRect* TextEntity::area() const { return m_area; } NormalizedRect TextEntity::transformedArea(const QTransform &matrix) const { NormalizedRect transformed_area = *m_area; transformed_area.transform( matrix ); return transformed_area; } TextPagePrivate::TextPagePrivate() : m_page( nullptr ) { } TextPagePrivate::~TextPagePrivate() { qDeleteAll( m_searchPoints ); qDeleteAll( m_words ); } TextPage::TextPage() : d( new TextPagePrivate() ) { } TextPage::TextPage( const TextEntity::List &words ) : d( new TextPagePrivate() ) { TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd(); for ( ; it != itEnd; ++it ) { TextEntity *e = *it; if ( !e->text().isEmpty() ) d->m_words.append( new TinyTextEntity( e->text(), *e->area() ) ); delete e; } } TextPage::~TextPage() { delete d; } void TextPage::append( const QString &text, NormalizedRect *area ) { if ( !text.isEmpty() ) d->m_words.append( new TinyTextEntity( text.normalized(QString::NormalizationForm_KC), *area ) ); delete area; } struct WordWithCharacters { WordWithCharacters(TinyTextEntity *w, const TextList &c) : word(w), characters(c) { } inline QString text() const { return word->text(); } inline const NormalizedRect &area() const { return word->area; } TinyTextEntity *word; TextList characters; }; typedef QList WordsWithCharacters; /** * We will divide the whole page in some regions depending on the horizontal and * vertical spacing among different regions. Each region will have an area and an * associated WordsWithCharacters in sorted order. */ class RegionText { public: RegionText() { }; RegionText(const WordsWithCharacters &wordsWithCharacters, const QRect &area) : m_region_wordWithCharacters(wordsWithCharacters), m_area(area) { } inline QString string() const { QString res; foreach(const WordWithCharacters &word, m_region_wordWithCharacters) res += word.text(); return res; } inline WordsWithCharacters text() const { return m_region_wordWithCharacters; } inline QRect area() const { return m_area; } inline void setArea(const QRect &area) { m_area = area; } inline void setText(const WordsWithCharacters &wordsWithCharacters) { m_region_wordWithCharacters = wordsWithCharacters; } private: WordsWithCharacters m_region_wordWithCharacters; QRect m_area; }; RegularAreaRect * TextPage::textArea ( TextSelection * sel) const { if ( d->m_words.isEmpty() ) return new RegularAreaRect(); /** It works like this: There are two cursors, we need to select all the text between them. The coordinates are normalised, leftTop is (0,0) rightBottom is (1,1), so for cursors start (sx,sy) and end (ex,ey) we start with finding text rectangles under those points, if not we search for the first that is to the right to it in the same baseline, if none found, then we search for the first rectangle with a baseline under the cursor, having two points that are the best rectangles to both of the cursors: (rx,ry)x(tx,ty) for start and (ux,uy)x(vx,vy) for end, we do a 1. (rx,ry)x(1,ty) 2. (0,ty)x(1,uy) 3. (0,uy)x(vx,vy) To find the closest rectangle to cursor (cx,cy) we search for a rectangle that either contains the cursor or that has a left border >= cx and bottom border >= cy. */ RegularAreaRect * ret= new RegularAreaRect; - const QTransform matrix = d->m_page ? d->m_page->rotationMatrix() : QTransform(); + PagePrivate *pagePrivate = PagePrivate::get(d->m_page); + const QTransform matrix = pagePrivate ? pagePrivate->rotationMatrix() : QTransform(); #if 0 int it = -1; int itB = -1; int itE = -1; // ending cursor is higher than start cursor, we need to find positions in reverse NormalizedRect tmp; NormalizedRect start; NormalizedRect end; NormalizedPoint startC = sel->start(); double startCx = startC.x; double startCy = startC.y; NormalizedPoint endC = sel->end(); double endCx = endC.x; double endCy = endC.y; if ( sel->direction() == 1 || ( sel->itB() == -1 && sel->direction() == 0 ) ) { #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "running first loop"; #endif const int count = d->m_words.count(); for ( it = 0; it < count; it++ ) { tmp = *d->m_words[ it ]->area(); if ( tmp.contains( startCx, startCy ) || ( tmp.top <= startCy && tmp.bottom >= startCy && tmp.left >= startCx ) || ( tmp.top >= startCy)) { /// we have found the (rx,ry)x(tx,ty) itB = it; #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "start is" << itB << "count is" << d->m_words.count(); #endif break; } } sel->itB( itB ); } itB = sel->itB(); #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "direction is" << sel->direction(); qCWarning(OkularCoreDebug) << "reloaded start is" << itB << "against" << sel->itB(); #endif if ( sel->direction() == 0 || ( sel->itE() == -1 && sel->direction() == 1 ) ) { #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "running second loop"; #endif for ( it = d->m_words.count() - 1; it >= itB; it-- ) { tmp = *d->m_words[ it ]->area(); if ( tmp.contains( endCx, endCy ) || ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) || ( tmp.bottom <= endCy ) ) { /// we have found the (ux,uy)x(vx,vy) itE = it; #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "ending is" << itE << "count is" << d->m_words.count(); qCWarning(OkularCoreDebug) << "conditions" << tmp.contains( endCx, endCy ) << " " << ( tmp.top <= endCy && tmp.bottom >= endCy && tmp.right <= endCx ) << " " << ( tmp.top >= endCy); #endif break; } } sel->itE( itE ); } #ifdef DEBUG_TEXTPAGE qCWarning(OkularCoreDebug) << "reloaded ending is" << itE << "against" << sel->itE(); #endif if ( sel->itB() != -1 && sel->itE() != -1 ) { start = *d->m_words[ sel->itB() ]->area(); end = *d->m_words[ sel->itE() ]->area(); NormalizedRect first, second, third; /// finding out if there is more than one baseline between them is a hard and discussable task /// we will create a rectangle (rx,0)x(tx,1) and will check how many times does it intersect the /// areas, if more than one -> we have a three or over line selection first = start; second.top = start.bottom; first.right = second.right = 1; third = end; third.left = second.left = 0; second.bottom = end.top; int selMax = qMax( sel->itB(), sel->itE() ); for ( it = qMin( sel->itB(), sel->itE() ); it <= selMax; ++it ) { tmp = *d->m_words[ it ]->area(); if ( tmp.intersects( &first ) || tmp.intersects( &second ) || tmp.intersects( &third ) ) ret->appendShape( d->m_words.at( it )->transformedArea( matrix ) ); } } #else - const double scaleX = d->m_page->m_page->width(); - const double scaleY = d->m_page->m_page->height(); + const double scaleX = d->m_page->width(); + const double scaleY = d->m_page->height(); NormalizedPoint startC = sel->start(); NormalizedPoint endC = sel->end(); NormalizedPoint temp; // if startPoint is right to endPoint swap them if(startC.x > endC.x) { temp = startC; startC = endC; endC = temp; } // minX,maxX,minY,maxY gives the bounding rectangle coordinates of the document - const NormalizedRect boundingRect = d->m_page->m_page->boundingBox(); + const NormalizedRect boundingRect = d->m_page->boundingBox(); const QRect content = boundingRect.geometry(scaleX,scaleY); const double minX = content.left(); const double maxX = content.right(); const double minY = content.top(); const double maxY = content.bottom(); /** * We will now find out the TinyTextEntity for the startRectangle and TinyTextEntity for * the endRectangle. We have four cases: * * Case 1(a): both startpoint and endpoint are out of the bounding Rectangle and at one side, so the rectangle made of start * and endPoint are outof the bounding rect (do not intersect) * * Case 1(b): both startpoint and endpoint are out of bounding rect, but they are in different side, so is their rectangle * * Case 2(a): find the rectangle which contains start and endpoint and having some * TextEntity * * Case 2(b): if 2(a) fails (if startPoint and endPoint both are unchanged), then we check whether there is any * TextEntity within the rect made by startPoint and endPoint * * Case 3: Now, we may have two type of selection. * 1. startpoint is left-top of start_end and endpoint is right-bottom * 2. startpoint is left-bottom of start_end and endpoint is top-right * * Also, as 2(b) is passed, we might have it,itEnd or both unchanged, but the fact is that we have * text within them. so, we need to search for the best suitable textposition for start and end. * * Case 3(a): We search the nearest rectangle consisting of some * TinyTextEntity right to or bottom of the startPoint for selection 01. * And, for selection 02, we have to search for right and top * * Case 3(b): For endpoint, we have to find the point top of or left to * endpoint if we have selection 01. * Otherwise, the search will be left and bottom */ // we know that startC.x > endC.x, we need to decide which is top and which is bottom const NormalizedRect start_end = (startC.y < endC.y) ? NormalizedRect(startC.x, startC.y, endC.x, endC.y) : NormalizedRect(startC.x, endC.y, endC.x, startC.y); // Case 1(a) if(!boundingRect.intersects(start_end)) return ret; // case 1(b) /** note that, after swapping of start and end, we know that, start is always left to end. but, we cannot say start is positioned upper than end. **/ else { // if start is left to content rect take it to content rect boundary if(startC.x * scaleX < minX) startC.x = minX/scaleX; if(endC.x * scaleX > maxX) endC.x = maxX/scaleX; // if start is top to end (selection type 01) if(startC.y * scaleY < minY) startC.y = minY/scaleY; if(endC.y * scaleY > maxY) endC.y = maxY/scaleY; // if start is bottom to end (selection type 02) if(startC.y * scaleY > maxY) startC.y = maxY/scaleY; if(endC.y * scaleY < minY) endC.y = minY/scaleY; } TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd(); TextList::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd; - const MergeSide side = d->m_page ? (MergeSide)d->m_page->m_page->totalOrientation() : MergeRight; + const MergeSide side = d->m_page ? (MergeSide)d->m_page->totalOrientation() : MergeRight; NormalizedRect tmp; //case 2(a) for ( ; it != itEnd; ++it ) { tmp = (*it)->area; if(tmp.contains(startC.x,startC.y)){ start = it; } if(tmp.contains(endC.x,endC.y)){ end = it; } } //case 2(b) it = tmpIt; if(start == it && end == itEnd) { for ( ; it != itEnd; ++it ) { // is there any text reactangle within the start_end rect tmp = (*it)->area; if(start_end.intersects(tmp)) break; } // we have searched every text entities, but none is within the rectangle created by start and end // so, no selection should be done if(it == itEnd) { return ret; } } it = tmpIt; bool selection_two_start = false; //case 3.a if(start == it) { bool flagV = false; NormalizedRect rect; // selection type 01 if(startC.y <= endC.y) { for ( ; it != itEnd; ++it ) { rect= (*it)->area; rect.isBottom(startC) ? flagV = false: flagV = true; if(flagV && rect.isRight(startC)) { start = it; break; } } } //selection type 02 else { selection_two_start = true; int distance = scaleX + scaleY + 100; int count = 0; for ( ; it != itEnd; ++it ) { rect= (*it)->area; if(rect.isBottomOrLevel(startC) && rect.isRight(startC)) { count++; QRect entRect = rect.geometry(scaleX,scaleY); int xdist, ydist; xdist = entRect.center().x() - startC.x * scaleX; ydist = entRect.center().y() - startC.y * scaleY; //make them positive if(xdist < 0) xdist = -xdist; if(ydist < 0) ydist = -ydist; if( (xdist + ydist) < distance) { distance = xdist+ ydist; start = it; } } } } } //case 3.b if(end == itEnd) { it = tmpIt; itEnd = itEnd-1; bool flagV = false; NormalizedRect rect; if(startC.y <= endC.y) { for ( ; itEnd >= it; itEnd-- ) { rect= (*itEnd)->area; rect.isTop(endC) ? flagV = false: flagV = true; if(flagV && rect.isLeft(endC)) { end = itEnd; break; } } } else { int distance = scaleX + scaleY + 100; for ( ; itEnd >= it; itEnd-- ) { rect= (*itEnd)->area; if(rect.isTopOrLevel(endC) && rect.isLeft(endC)) { QRect entRect = rect.geometry(scaleX,scaleY); int xdist, ydist; xdist = entRect.center().x() - endC.x * scaleX; ydist = entRect.center().y() - endC.y * scaleY; //make them positive if(xdist < 0) xdist = -xdist; if(ydist < 0) ydist = -ydist; if( (xdist + ydist) < distance) { distance = xdist+ ydist; end = itEnd; } } } } } /* if start and end in selection 02 are in the same column, and we start at an empty space we have to remove the selection of last character */ if(selection_two_start) { if(start > end) { start = start - 1; } } // if start is less than end swap them if(start > end) { it = start; start = end; end = it; } // removes the possibility of crash, in case none of 1 to 3 is true if(end == d->m_words.constEnd()) end--; for( ;start <= end ; start++) { ret->appendShape( (*start)->transformedArea( matrix ), side ); } #endif return ret; } RegularAreaRect* TextPage::findText( int searchID, const QString &query, SearchDirection direct, Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *area ) { SearchDirection dir=direct; // invalid search request if ( d->m_words.isEmpty() || query.isEmpty() || ( area && area->isNull() ) ) return nullptr; TextList::ConstIterator start; int start_offset = 0; TextList::ConstIterator end; const QMap< int, SearchPoint* >::const_iterator sIt = d->m_searchPoints.constFind( searchID ); if ( sIt == d->m_searchPoints.constEnd() ) { // if no previous run of this search is found, then set it to start // from the beginning (respecting the search direction) if ( dir == NextResult ) dir = FromTop; else if ( dir == PreviousResult ) dir = FromBottom; } bool forward = true; switch ( dir ) { case FromTop: start = d->m_words.constBegin(); start_offset = 0; end = d->m_words.constEnd(); break; case FromBottom: start = d->m_words.constEnd(); start_offset = 0; end = d->m_words.constBegin(); forward = false; break; case NextResult: start = (*sIt)->it_end; start_offset = (*sIt)->offset_end; end = d->m_words.constEnd(); break; case PreviousResult: start = (*sIt)->it_begin; start_offset = (*sIt)->offset_begin; end = d->m_words.constBegin(); forward = false; break; }; RegularAreaRect* ret = nullptr; const TextComparisonFunction cmpFn = caseSensitivity == Qt::CaseSensitive ? CaseSensitiveCmpFn : CaseInsensitiveCmpFn; if ( forward ) { ret = d->findTextInternalForward( searchID, query, cmpFn, start, start_offset, end ); } else { ret = d->findTextInternalBackward( searchID, query, cmpFn, start, start_offset, end ); } return ret; } // hyphenated '-' must be at the end of a word, so hyphenation means // we have a '-' just followed by a '\n' character // check if the string contains a '-' character // if the '-' is the last entry static int stringLengthAdaptedWithHyphen(const QString &str, const TextList::ConstIterator &it, const TextList::ConstIterator &textListEnd) { int len = str.length(); // hyphenated '-' must be at the end of a word, so hyphenation means // we have a '-' just followed by a '\n' character // check if the string contains a '-' character // if the '-' is the last entry if ( str.endsWith( QLatin1Char('-') ) ) { // validity chek of it + 1 if ( ( it + 1 ) != textListEnd ) { // 1. if the next character is '\n' const QString &lookahedStr = (*(it+1))->text(); if (lookahedStr.startsWith(QLatin1Char('\n'))) { len -= 1; } else { // 2. if the next word is in a different line or not const NormalizedRect& hyphenArea = (*it)->area; const NormalizedRect& lookaheadArea = (*(it + 1))->area; // lookahead to check whether both the '-' rect and next character rect overlap if( !doesConsumeY( hyphenArea, lookaheadArea, 70 ) ) { len -= 1; } } } } // else if it is the second last entry - for example in pdf format else if (str.endsWith(QLatin1String("-\n"))) { len -= 2; } return len; } RegularAreaRect* TextPagePrivate::searchPointToArea(const SearchPoint* sp) { - const QTransform matrix = m_page ? m_page->rotationMatrix() : QTransform(); + PagePrivate *pagePrivate = PagePrivate::get(m_page); + const QTransform matrix = pagePrivate ? pagePrivate->rotationMatrix() : QTransform(); RegularAreaRect* ret=new RegularAreaRect; for (TextList::ConstIterator it = sp->it_begin; ; it++) { const TinyTextEntity* curEntity = *it; ret->append( curEntity->transformedArea( matrix ) ); if (it == sp->it_end) { break; } } ret->simplify(); return ret; } RegularAreaRect* TextPagePrivate::findTextInternalForward( int searchID, const QString &_query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end) { // normalize query search all unicode (including glyphs) const QString query = _query.normalized(QString::NormalizationForm_KC); // j is the current position in our query // len is the length of the string in TextEntity // queryLeft is the length of the query we have left int j=0, queryLeft=query.length(); TextList::ConstIterator it = start; int offset = start_offset; TextList::ConstIterator it_begin = TextList::ConstIterator(); int offset_begin = 0; //dummy initial value to suppress compiler warnings while ( it != end ) { const TinyTextEntity* curEntity = *it; const QString& str = curEntity->text(); int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd()); if (offset >= len) { it++; offset = 0; continue; } if ( it_begin == TextList::ConstIterator() ) { it_begin = it; offset_begin = offset; } int min=qMin(queryLeft,len-offset); { #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << str.midRef(offset, min) << ":" << _query.midRef(j, min); #endif // we have equal (or less than) area of the query left as the length of the current // entity if ( !comparer( str.midRef( offset, min ), query.midRef( j, min ) ) ) { // we have not matched // this means we do not have a complete match // we need to get back to query start // and continue the search from this place #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << "\tnot matched"; #endif j = 0; queryLeft=query.length(); it = it_begin; offset = offset_begin+1; it_begin = TextList::ConstIterator(); } else { // we have a match // move the current position in the query // to the position after the length of this string // we matched // subtract the length of the current entity from // the left length of the query #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << "\tmatched"; #endif j += min; queryLeft -= min; if (queryLeft==0) { // save or update the search point for the current searchID QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID ); if ( sIt == m_searchPoints.end() ) { sIt = m_searchPoints.insert( searchID, new SearchPoint ); } SearchPoint* sp = *sIt; sp->it_begin = it_begin; sp->it_end = it; sp->offset_begin = offset_begin; sp->offset_end = offset + min; return searchPointToArea(sp); } it++; offset = 0; } } } // end of loop - it means that we've ended the textentities const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID ); if ( sIt != m_searchPoints.end() ) { SearchPoint* sp = *sIt; m_searchPoints.erase( sIt ); delete sp; } return nullptr; } RegularAreaRect* TextPagePrivate::findTextInternalBackward( int searchID, const QString &_query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end) { // normalize query to search all unicode (including glyphs) const QString query = _query.normalized(QString::NormalizationForm_KC); // j is the current position in our query // len is the length of the string in TextEntity // queryLeft is the length of the query we have left int j=query.length(), queryLeft=query.length(); TextList::ConstIterator it = start; int offset = start_offset; TextList::ConstIterator it_begin = TextList::ConstIterator(); int offset_begin = 0; //dummy initial value to suppress compiler warnings while ( true ) { if (offset <= 0) { if ( it == end ) { break; } it--; } const TinyTextEntity* curEntity = *it; const QString& str = curEntity->text(); int len = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd()); if (offset <= 0) { offset = len; } if ( it_begin == TextList::ConstIterator() ) { it_begin = it; offset_begin = offset; } int min=qMin(queryLeft,offset); { #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << str.midRef(offset-min, min) << " : " << _query.midRef(j-min, min); #endif // we have equal (or less than) area of the query left as the length of the current // entity // Note len is not str.length() so we can't use rightRef here if ( !comparer( str.midRef(offset-min, min ), query.midRef( j - min, min ) ) ) { // we have not matched // this means we do not have a complete match // we need to get back to query start // and continue the search from this place #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << "\tnot matched"; #endif j = query.length(); queryLeft = query.length(); it = it_begin; offset = offset_begin-1; it_begin = TextList::ConstIterator(); } else { // we have a match // move the current position in the query // to the position after the length of this string // we matched // subtract the length of the current entity from // the left length of the query #ifdef DEBUG_TEXTPAGE qCDebug(OkularCoreDebug) << "\tmatched"; #endif j -= min; queryLeft -= min; if ( queryLeft == 0 ) { // save or update the search point for the current searchID QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID ); if ( sIt == m_searchPoints.end() ) { sIt = m_searchPoints.insert( searchID, new SearchPoint ); } SearchPoint* sp = *sIt; sp->it_begin = it; sp->it_end = it_begin; sp->offset_begin = offset - min; sp->offset_end = offset_begin; return searchPointToArea(sp); } offset = 0; } } } // end of loop - it means that we've ended the textentities const QMap< int, SearchPoint* >::iterator sIt = m_searchPoints.find( searchID ); if ( sIt != m_searchPoints.end() ) { SearchPoint* sp = *sIt; m_searchPoints.erase( sIt ); delete sp; } return nullptr; } QString TextPage::text(const RegularAreaRect *area) const { return text(area, AnyPixelTextAreaInclusionBehaviour); } QString TextPage::text(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const { if ( area && area->isNull() ) return QString(); TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd(); QString ret; if ( area ) { for ( ; it != itEnd; ++it ) { if (b == AnyPixelTextAreaInclusionBehaviour) { if ( area->intersects( (*it)->area ) ) { ret += (*it)->text(); } } else { NormalizedPoint center = (*it)->area.center(); if ( area->contains( center.x, center.y ) ) { ret += (*it)->text(); } } } } else { for ( ; it != itEnd; ++it ) ret += (*it)->text(); } return ret; } static bool compareTinyTextEntityX(const WordWithCharacters &first, const WordWithCharacters &second) { QRect firstArea = first.area().roundedGeometry(1000,1000); QRect secondArea = second.area().roundedGeometry(1000,1000); return firstArea.left() < secondArea.left(); } static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWithCharacters &second) { const QRect firstArea = first.area().roundedGeometry(1000,1000); const QRect secondArea = second.area().roundedGeometry(1000,1000); return firstArea.top() < secondArea.top(); } /** * Sets a new world list. Deleting the contents of the old one */ void TextPagePrivate::setWordList(const TextList &list) { qDeleteAll(m_words); m_words = list; } /** * Remove all the spaces in between texts. It will make all the generators * same, whether they save spaces(like pdf) or not(like djvu). */ static void removeSpace(TextList *words) { TextList::Iterator it = words->begin(); const QString str(QLatin1Char(' ')); while ( it != words->end() ) { if((*it)->text() == str) { it = words->erase(it); } else { ++it; } } } /** * We will read the TinyTextEntity from characters and try to create words from there. * Note: characters might be already characters for some generators, but we will keep * the nomenclature characters for the generator produced data. The resulting * WordsWithCharacters memory has to be managed by the caller, both the * WordWithCharacters::word and WordWithCharacters::characters contents */ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight) { /** * We will traverse characters and try to create words from the TinyTextEntities in it. * We will search TinyTextEntity blocks and merge them until we get a * space between two consecutive TinyTextEntities. When we get a space * we can take it as a end of word. Then we store the word as a TinyTextEntity * and keep it in newList. * We create a RegionText named regionWord that contains the word and the characters associated with it and * a rectangle area of the element in newList. */ WordsWithCharacters wordsWithCharacters; TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt; int newLeft,newRight,newTop,newBottom; int index = 0; for( ; it != itEnd ; it++) { QString textString = (*it)->text(); QString newString; QRect lineArea = (*it)->area.roundedGeometry(pageWidth,pageHeight),elementArea; TextList wordCharacters; tmpIt = it; int space = 0; while (!space) { if (textString.length()) { newString.append(textString); // when textString is the start of the word if (tmpIt == it) { NormalizedRect newRect(lineArea,pageWidth,pageHeight); wordCharacters.append(new TinyTextEntity(textString.normalized (QString::NormalizationForm_KC), newRect)); } else { NormalizedRect newRect(elementArea,pageWidth,pageHeight); wordCharacters.append(new TinyTextEntity(textString.normalized (QString::NormalizationForm_KC), newRect)); } } ++it; /* we must have to put this line before the if condition of it==itEnd otherwise the last character can be missed */ if (it == itEnd) break; elementArea = (*it)->area.roundedGeometry(pageWidth,pageHeight); if (!doesConsumeY(elementArea, lineArea, 60)) { --it; break; } const int text_y1 = elementArea.top() , text_x1 = elementArea.left(), text_y2 = elementArea.y() + elementArea.height(), text_x2 = elementArea.x() + elementArea.width(); const int line_y1 = lineArea.top() ,line_x1 = lineArea.left(), line_y2 = lineArea.y() + lineArea.height(), line_x2 = lineArea.x() + lineArea.width(); space = elementArea.left() - lineArea.right(); if (space != 0) { it--; break; } newLeft = text_x1 < line_x1 ? text_x1 : line_x1; newRight = line_x2 > text_x2 ? line_x2 : text_x2; newTop = text_y1 > line_y1 ? line_y1 : text_y1; newBottom = text_y2 > line_y2 ? text_y2 : line_y2; lineArea.setLeft (newLeft); lineArea.setTop (newTop); lineArea.setWidth( newRight - newLeft ); lineArea.setHeight( newBottom - newTop ); textString = (*it)->text(); } // if newString is not empty, save it if (!newString.isEmpty()) { const NormalizedRect newRect(lineArea, pageWidth, pageHeight); TinyTextEntity *word = new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect); wordsWithCharacters.append(WordWithCharacters(word, wordCharacters)); index++; } if(it == itEnd) break; } return wordsWithCharacters; } /** * Create Lines from the words and sort them */ QList< QPair > makeAndSortLines(const WordsWithCharacters &wordsTmp, int pageWidth, int pageHeight) { /** * We cannot assume that the generator will give us texts in the right order. * We can only assume that we will get texts in the page and their bounding * rectangle. The texts can be character, word, half-word anything. * So, we need to: ** * 1. Sort rectangles/boxes containing texts by y0(top) * 2. Create textline where there is y overlap between TinyTextEntity 's * 3. Within each line sort the TinyTextEntity 's by x0(left) */ QList< QPair > lines; /* Make a new copy of the TextList in the words, so that the wordsTmp and lines do not contain same pointers for all the TinyTextEntity. */ QList words = wordsTmp; // Step 1 qSort(words.begin(),words.end(),compareTinyTextEntityY); // Step 2 QList::Iterator it = words.begin(), itEnd = words.end(); //for every non-space texts(characters/words) in the textList for( ; it != itEnd ; it++) { const QRect elementArea = (*it).area().roundedGeometry(pageWidth,pageHeight); bool found = false; for( int i = 0 ; i < lines.length() ; i++) { /* the line area which will be expanded line_rects is only necessary to preserve the topmin and bottommax of all the texts in the line, left and right is not necessary at all */ QRect &lineArea = lines[i].second; const int text_y1 = elementArea.top() , text_y2 = elementArea.top() + elementArea.height() , text_x1 = elementArea.left(), text_x2 = elementArea.left() + elementArea.width(); const int line_y1 = lineArea.top() , line_y2 = lineArea.top() + lineArea.height(), line_x1 = lineArea.left(), line_x2 = lineArea.left() + lineArea.width(); /* if the new text and the line has y overlapping parts of more than 70%, the text will be added to this line */ if(doesConsumeY(elementArea,lineArea,70)) { WordsWithCharacters &line = lines[i].first; line.append(*it); const int newLeft = line_x1 < text_x1 ? line_x1 : text_x1; const int newRight = line_x2 > text_x2 ? line_x2 : text_x2; const int newTop = line_y1 < text_y1 ? line_y1 : text_y1; const int newBottom = text_y2 > line_y2 ? text_y2 : line_y2; lineArea = QRect( newLeft,newTop, newRight - newLeft, newBottom - newTop ); found = true; } if(found) break; } /* when we have found a new line create a new TextList containing only one element and append it to the lines */ if(!found) { WordsWithCharacters tmp; tmp.append((*it)); lines.append(QPair(tmp, elementArea)); } } // Step 3 for(int i = 0 ; i < lines.length() ; i++) { WordsWithCharacters &list = lines[i].first; qSort(list.begin(), list.end(), compareTinyTextEntityX); } return lines; } /** * Calculate Statistical information from the lines we made previously */ static void calculateStatisticalInformation(const QList &words, int pageWidth, int pageHeight, int *word_spacing, int *line_spacing, int *col_spacing) { /** * For the region, defined by line_rects and lines * 1. Make line statistical analysis to find the line spacing * 2. Make character statistical analysis to differentiate between * word spacing and column spacing. */ /** * Step 0 */ const QList< QPair > sortedLines = makeAndSortLines(words, pageWidth, pageHeight); /** * Step 1 */ QMap line_space_stat; for(int i = 0 ; i < sortedLines.length(); i++) { const QRect rectUpper = sortedLines.at(i).second; if(i+1 == sortedLines.length()) break; const QRect rectLower = sortedLines.at(i+1).second; int linespace = rectLower.top() - (rectUpper.top() + rectUpper.height()); if(linespace < 0) linespace =-linespace; if(line_space_stat.contains(linespace)) line_space_stat[linespace]++; else line_space_stat[linespace] = 1; } *line_spacing = 0; int weighted_count = 0; QMapIterator iterate_linespace(line_space_stat); while(iterate_linespace.hasNext()) { iterate_linespace.next(); *line_spacing += iterate_linespace.value() * iterate_linespace.key(); weighted_count += iterate_linespace.value(); } if (*line_spacing != 0) *line_spacing = (int) ( (double)*line_spacing / (double) weighted_count + 0.5); /** * Step 2 */ // We would like to use QMap instead of QHash as it will keep the keys sorted QMap hor_space_stat; QMap col_space_stat; QList< QList > space_rects; QList max_hor_space_rects; // Space in every line for(int i = 0 ; i < sortedLines.length() ; i++) { const WordsWithCharacters list = sortedLines.at(i).first; QList line_space_rects; int maxSpace = 0, minSpace = pageWidth; // for every TinyTextEntity element in the line WordsWithCharacters::ConstIterator it = list.begin(), itEnd = list.end(); QRect max_area1,max_area2; QString before_max, after_max; // for every line for( ; it != itEnd ; it++ ) { const QRect area1 = (*it).area().roundedGeometry(pageWidth,pageHeight); if( it+1 == itEnd ) break; const QRect area2 = (*(it+1)).area().roundedGeometry(pageWidth,pageHeight); int space = area2.left() - area1.right(); if(space > maxSpace) { max_area1 = area1; max_area2 = area2; maxSpace = space; before_max = (*it).text(); after_max = (*(it+1)).text(); } if(space < minSpace && space != 0) minSpace = space; //if we found a real space, whose length is not zero and also less than the pageWidth if(space != 0 && space != pageWidth) { // increase the count of the space amount if(hor_space_stat.contains(space)) hor_space_stat[space]++; else hor_space_stat[space] = 1; int left,right,top,bottom; left = area1.right(); right = area2.left(); top = area2.top() < area1.top() ? area2.top() : area1.top(); bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom(); QRect rect(left,top,right-left,bottom-top); line_space_rects.append(rect); } } space_rects.append(line_space_rects); if(hor_space_stat.contains(maxSpace)) { if(hor_space_stat[maxSpace] != 1) hor_space_stat[maxSpace]--; else hor_space_stat.remove(maxSpace); } if(maxSpace != 0) { if (col_space_stat.contains(maxSpace)) col_space_stat[maxSpace]++; else col_space_stat[maxSpace] = 1; //store the max rect of each line const int left = max_area1.right(); const int right = max_area2.left(); const int top = (max_area1.top() > max_area2.top()) ? max_area2.top() : max_area1.top(); const int bottom = (max_area1.bottom() < max_area2.bottom()) ? max_area2.bottom() : max_area1.bottom(); const QRect rect(left,top,right-left,bottom-top); max_hor_space_rects.append(rect); } else max_hor_space_rects.append(QRect(0,0,0,0)); } // All the between word space counts are in hor_space_stat *word_spacing = 0; weighted_count = 0; QMapIterator iterate(hor_space_stat); while (iterate.hasNext()) { iterate.next(); if(iterate.key() > 0) { *word_spacing += iterate.value() * iterate.key(); weighted_count += iterate.value(); } } if(weighted_count) *word_spacing = (int) ((double)*word_spacing / (double)weighted_count + 0.5); *col_spacing = 0; QMapIterator iterate_col(col_space_stat); while (iterate_col.hasNext()) { iterate_col.next(); if(iterate_col.value() > *col_spacing) *col_spacing = iterate_col.value(); } *col_spacing = col_space_stat.key(*col_spacing); // if there is just one line in a region, there is no point in dividing it if(sortedLines.length() == 1) *word_spacing = *col_spacing; } /** * Implements the XY Cut algorithm for textpage segmentation * The resulting RegionTextList will contain RegionText whose WordsWithCharacters::word and * WordsWithCharacters::characters are reused from wordsWithCharacters (i.e. no new nor delete happens in this function) */ static RegionTextList XYCutForBoundingBoxes(const QList &wordsWithCharacters, const NormalizedRect &boundingBox, int pageWidth, int pageHeight) { RegionTextList tree; QRect contentRect(boundingBox.geometry(pageWidth,pageHeight)); const RegionText root(wordsWithCharacters, contentRect); // start the tree with the root, it is our only region at the start tree.push_back(root); int i = 0; // while traversing the tree has not been ended while(i < tree.length()) { const RegionText node = tree.at(i); QRect regionRect = node.area(); /** * 1. calculation of projection profiles */ // allocate the size of proj profiles and initialize with 0 int size_proj_y = node.area().height(); int size_proj_x = node.area().width(); //dynamic memory allocation QVarLengthArray proj_on_xaxis(size_proj_x); QVarLengthArray proj_on_yaxis(size_proj_y); for( int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] = 0; for( int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] = 0; const QList list = node.text(); // Calculate tcx and tcy locally for each new region int word_spacing, line_spacing, column_spacing; calculateStatisticalInformation(list, pageWidth, pageHeight, &word_spacing, &line_spacing, &column_spacing); const int tcx = word_spacing * 2; const int tcy = line_spacing * 2; int maxX = 0 , maxY = 0; int avgX = 0; int count; // for every text in the region for(int j = 0 ; j < list.length() ; ++j ) { TinyTextEntity *ent = list.at(j).word; const QRect entRect = ent->area.geometry(pageWidth, pageHeight); // calculate vertical projection profile proj_on_xaxis1 for(int k = entRect.left() ; k <= entRect.left() + entRect.width() ; ++k) { if( ( k-regionRect.left() ) < size_proj_x && ( k-regionRect.left() ) >= 0 ) proj_on_xaxis[k - regionRect.left()] += entRect.height(); } // calculate horizontal projection profile in the same way for(int k = entRect.top() ; k <= entRect.top() + entRect.height() ; ++k) { if( ( k-regionRect.top() ) < size_proj_y && ( k-regionRect.top() ) >= 0 ) proj_on_yaxis[k - regionRect.top()] += entRect.width(); } } for( int j = 0 ; j < size_proj_y ; ++j ) { if (proj_on_yaxis[j] > maxY) maxY = proj_on_yaxis[j]; } avgX = count = 0; for( int j = 0 ; j < size_proj_x ; ++j ) { if(proj_on_xaxis[j] > maxX) maxX = proj_on_xaxis[j]; if(proj_on_xaxis[j]) { count++; avgX+= proj_on_xaxis[j]; } } if(count) avgX /= count; /** * 2. Cleanup Boundary White Spaces and removal of noise */ int xbegin = 0, xend = size_proj_x - 1; int ybegin = 0, yend = size_proj_y - 1; while(xbegin < size_proj_x && proj_on_xaxis[xbegin] <= 0) xbegin++; while(xend >= 0 && proj_on_xaxis[xend] <= 0) xend--; while(ybegin < size_proj_y && proj_on_yaxis[ybegin] <= 0) ybegin++; while(yend >= 0 && proj_on_yaxis[yend] <= 0) yend--; //update the regionRect int old_left = regionRect.left(), old_top = regionRect.top(); regionRect.setLeft(old_left + xbegin); regionRect.setRight(old_left + xend); regionRect.setTop(old_top + ybegin); regionRect.setBottom(old_top + yend); int tnx = (int)((double)avgX * 10.0 / 100.0 + 0.5), tny = 0; for( int j = 0 ; j < size_proj_x ; ++j ) proj_on_xaxis[j] -= tnx; for( int j = 0 ; j < size_proj_y ; ++j ) proj_on_yaxis[j] -= tny; /** * 3. Find the Widest gap */ int gap_hor = -1, pos_hor = -1; int begin = -1, end = -1; // find all hor_gaps and find the maximum between them for(int j = 1 ; j < size_proj_y ; ++j) { //transition from white to black if(begin >= 0 && proj_on_yaxis[j-1] <= 0 && proj_on_yaxis[j] > 0) end = j; //transition from black to white if(proj_on_yaxis[j-1] > 0 && proj_on_yaxis[j] <= 0) begin = j; if(begin > 0 && end > 0 && end-begin > gap_hor) { gap_hor = end - begin; pos_hor = (end + begin) / 2; begin = -1; end = -1; } } begin = -1, end = -1; int gap_ver = -1, pos_ver = -1; //find all the ver_gaps and find the maximum between them for(int j = 1 ; j < size_proj_x ; ++j) { //transition from white to black if(begin >= 0 && proj_on_xaxis[j-1] <= 0 && proj_on_xaxis[j] > 0){ end = j; } //transition from black to white if(proj_on_xaxis[j-1] > 0 && proj_on_xaxis[j] <= 0) begin = j; if(begin > 0 && end > 0 && end-begin > gap_ver) { gap_ver = end - begin; pos_ver = (end + begin) / 2; begin = -1; end = -1; } } int cut_pos_x = pos_ver, cut_pos_y = pos_hor; int gap_x = gap_ver, gap_y = gap_hor; /** * 4. Cut the region and make nodes (left,right) or (up,down) */ bool cut_hor = false, cut_ver = false; // For horizontal cut const int topHeight = cut_pos_y - (regionRect.top() - old_top); const QRect topRect(regionRect.left(), regionRect.top(), regionRect.width(), topHeight); const QRect bottomRect(regionRect.left(), regionRect.top() + topHeight, regionRect.width(), regionRect.height() - topHeight ); // For vertical Cut const int leftWidth = cut_pos_x - (regionRect.left() - old_left); const QRect leftRect(regionRect.left(), regionRect.top(), leftWidth, regionRect.height()); const QRect rightRect(regionRect.left() + leftWidth, regionRect.top(), regionRect.width() - leftWidth, regionRect.height()); if(gap_y >= gap_x && gap_y >= tcy) cut_hor = true; else if(gap_y >= gap_x && gap_y <= tcy && gap_x >= tcx) cut_ver = true; else if(gap_x >= gap_y && gap_x >= tcx) cut_ver = true; else if(gap_x >= gap_y && gap_x <= tcx && gap_y >= tcy) cut_hor = true; // no cut possible else { // we can now update the node rectangle with the shrinked rectangle RegionText tmpNode = tree.at(i); tmpNode.setArea(regionRect); tree.replace(i,tmpNode); i++; continue; } WordsWithCharacters list1,list2; // horizontal cut, topRect and bottomRect if(cut_hor) { for( int j = 0 ; j < list.length() ; ++j ) { const WordWithCharacters word = list.at(j); const QRect wordRect = word.area().geometry(pageWidth,pageHeight); if(topRect.intersects(wordRect)) list1.append(word); else list2.append(word); } RegionText node1(list1,topRect); RegionText node2(list2,bottomRect); tree.replace(i,node1); tree.insert(i+1,node2); } //vertical cut, leftRect and rightRect else if(cut_ver) { for( int j = 0 ; j < list.length() ; ++j ) { const WordWithCharacters word = list.at(j); const QRect wordRect = word.area().geometry(pageWidth,pageHeight); if(leftRect.intersects(wordRect)) list1.append(word); else list2.append(word); } RegionText node1(list1,leftRect); RegionText node2(list2,rightRect); tree.replace(i,node1); tree.insert(i+1,node2); } } return tree; } /** * Add spaces in between words in a line. It reuses the pointers passed in tree and might add new ones. You will need to take care of deleting them if needed */ WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight) { /** * 1. Call makeAndSortLines before adding spaces in between words in a line * 2. Now add spaces between every two words in a line * 3. Finally, extract all the space separated texts from each region and return it */ // Only change the texts under RegionTexts, not the area for(int j = 0 ; j < tree.length() ; j++) { RegionText &tmpRegion = tree[j]; // Step 01 QList< QPair > sortedLines = makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight); // Step 02 for(int i = 0 ; i < sortedLines.length() ; i++) { WordsWithCharacters &list = sortedLines[i].first; for(int k = 0 ; k < list.length() ; k++ ) { const QRect area1 = list.at(k).area().roundedGeometry(pageWidth,pageHeight); if( k+1 >= list.length() ) break; const QRect area2 = list.at(k+1).area().roundedGeometry(pageWidth,pageHeight); const int space = area2.left() - area1.right(); if(space != 0) { // Make a TinyTextEntity of string space and push it between it and it+1 const int left = area1.right(); const int right = area2.left(); const int top = area2.top() < area1.top() ? area2.top() : area1.top(); const int bottom = area2.bottom() > area1.bottom() ? area2.bottom() : area1.bottom(); const QString spaceStr(QStringLiteral(" ")); const QRect rect(QPoint(left,top),QPoint(right,bottom)); const NormalizedRect entRect(rect,pageWidth,pageHeight); TinyTextEntity *ent1 = new TinyTextEntity(spaceStr, entRect); TinyTextEntity *ent2 = new TinyTextEntity(spaceStr, entRect); WordWithCharacters word(ent1, QList() << ent2); list.insert(k+1, word); // Skip the space k++; } } } WordsWithCharacters tmpList; for(int i = 0 ; i < sortedLines.length() ; i++) { tmpList += sortedLines.at(i).first; } tmpRegion.setText(tmpList); } // Step 03 WordsWithCharacters tmp; for(int i = 0 ; i < tree.length() ; i++) { tmp += tree.at(i).text(); } return tmp; } /** * Correct the textOrder, all layout recognition works here */ void TextPagePrivate::correctTextOrder() { //m_page->m_page->width() and m_page->m_page->height() are in pixels at //100% zoom level, and thus depend on display DPI. We scale pageWidth and //pageHeight to remove the dependence. Otherwise bugs would be more difficult //to reproduce and Okular could fail in extreme cases like a large TV with low DPI. - const double scalingFactor = 2000.0 / (m_page->m_page->width() + m_page->m_page->height()); - const int pageWidth = (int) (scalingFactor * m_page->m_page->width() ); - const int pageHeight = (int) (scalingFactor * m_page->m_page->height()); + const double scalingFactor = 2000.0 / (m_page->width() + m_page->height()); + const int pageWidth = (int) (scalingFactor * m_page->width() ); + const int pageHeight = (int) (scalingFactor * m_page->height()); TextList characters = m_words; /** * Remove spaces from the text */ removeSpace(&characters); /** * Construct words from characters */ const QList wordsWithCharacters = makeWordFromCharacters(characters, pageWidth, pageHeight); /** * Make a XY Cut tree for segmentation of the texts */ - const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, m_page->m_page->boundingBox(), pageWidth, pageHeight); + const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, m_page->boundingBox(), pageWidth, pageHeight); /** * Add spaces to the word */ const WordsWithCharacters listWithWordsAndSpaces = addNecessarySpace(tree, pageWidth, pageHeight); /** * Break the words into characters */ TextList listOfCharacters; foreach(const WordWithCharacters &word, listWithWordsAndSpaces) { delete word.word; listOfCharacters.append(word.characters); } setWordList(listOfCharacters); } TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionBehaviour b) const { if ( area && area->isNull() ) return TextEntity::List(); TextEntity::List ret; if ( area ) { foreach (TinyTextEntity *te, d->m_words) { if (b == AnyPixelTextAreaInclusionBehaviour) { if ( area->intersects( te->area ) ) { ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) ); } } else { const NormalizedPoint center = te->area.center(); if ( area->contains( center.x, center.y ) ) { ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) ); } } } } else { foreach (TinyTextEntity *te, d->m_words) { ret.append( new TextEntity( te->text(), new Okular::NormalizedRect( te->area) ) ); } } return ret; } RegularAreaRect * TextPage::wordAt( const NormalizedPoint &p, QString *word ) const { TextList::ConstIterator itBegin = d->m_words.constBegin(), itEnd = d->m_words.constEnd(); TextList::ConstIterator it = itBegin; TextList::ConstIterator posIt = itEnd; for ( ; it != itEnd; ++it ) { if ( (*it)->area.contains( p.x, p.y ) ) { posIt = it; break; } } QString text; if ( posIt != itEnd ) { if ( (*posIt)->text().simplified().isEmpty() ) { return nullptr; } // Find the first TinyTextEntity of the word while ( posIt != itBegin ) { --posIt; const QString itText = (*posIt)->text(); if ( itText.right(1).at(0).isSpace() ) { if (itText.endsWith(QLatin1String("-\n"))) { // Is an hyphenated word // continue searching the start of the word back continue; } if (itText == QLatin1String("\n") && posIt != itBegin ) { --posIt; if ((*posIt)->text().endsWith(QLatin1String("-"))) { // Is an hyphenated word // continue searching the start of the word back continue; } ++posIt; } ++posIt; break; } } RegularAreaRect *ret = new RegularAreaRect(); for ( ; posIt != itEnd; ++posIt ) { const QString itText = (*posIt)->text(); if ( itText.simplified().isEmpty() ) { break; } ret->appendShape( (*posIt)->area ); text += (*posIt)->text(); if (itText.right(1).at(0).isSpace()) { if (!text.endsWith(QLatin1String("-\n"))) { break; } } } if (word) { *word = text; } return ret; } else { return nullptr; } } diff --git a/core/textpage_p.h b/core/textpage_p.h index 486d5cb2b..6b68b51b3 100644 --- a/core/textpage_p.h +++ b/core/textpage_p.h @@ -1,79 +1,79 @@ /*************************************************************************** * Copyright (C) 2006 by Tobias Koenig * * Copyright (C) 2007 by Pino Toscano * * * * This program is free software; you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation; either version 2 of the License, or * * (at your option) any later version. * ***************************************************************************/ #ifndef _OKULAR_TEXTPAGE_P_H_ #define _OKULAR_TEXTPAGE_P_H_ #include #include #include #include class SearchPoint; class TinyTextEntity; class RegionText; namespace Okular { class PagePrivate; typedef QList< TinyTextEntity* > TextList; /** * Returns whether the two strings match. * Satisfies the condition that if two strings match then their lengths are equal. */ typedef bool ( *TextComparisonFunction )( const QStringRef & from, const QStringRef & to ); /** * A list of RegionText. It keeps a bunch of TextList with their bounding rectangles */ typedef QList RegionTextList; class TextPagePrivate { public: TextPagePrivate(); ~TextPagePrivate(); RegularAreaRect * findTextInternalForward( int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end); RegularAreaRect * findTextInternalBackward( int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator &start, int start_offset, const TextList::ConstIterator &end ); /** * Copy a TextList to m_words, the pointers of list are adopted */ void setWordList(const TextList &list); /** * Make necessary modifications in the TextList to make the text order correct, so * that textselection works fine */ void correctTextOrder(); // variables those can be accessed directly from TextPage TextList m_words; QMap< int, SearchPoint* > m_searchPoints; - PagePrivate *m_page; + Page *m_page; private: RegularAreaRect * searchPointToArea(const SearchPoint* sp); }; } #endif