Changeset View
Changeset View
Standalone View
Standalone View
core/textpage.h
Show All 23 Lines | |||||
24 | class NormalizedRect; | 24 | class NormalizedRect; | ||
25 | class Page; | 25 | class Page; | ||
26 | class PagePrivate; | 26 | class PagePrivate; | ||
27 | class TextPagePrivate; | 27 | class TextPagePrivate; | ||
28 | class TextSelection; | 28 | class TextSelection; | ||
29 | class RegularAreaRect; | 29 | class RegularAreaRect; | ||
30 | 30 | | |||
31 | /*! @class TextEntity | 31 | /*! @class TextEntity | ||
32 | * @short Abstract textentity of Okular | 32 | * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box. | ||
33 | * @par The context | | |||
34 | * A document can provide different forms of information about textual representation | | |||
35 | * of its contents. It can include information about positions of every character on the | | |||
36 | * page, this is the best possibility. | | |||
37 | * | 33 | * | ||
38 | * But also it can provide information only about positions of every word on the page (not the character). | 34 | * To enable searching and text selection, a generator can give information about the textual | ||
39 | * Furthermore it can provide information only about the position of the whole page's text on the page. | 35 | * content of a Page using a TextPage. | ||
36 | * A TextPage is created using TextEntity objects. | ||||
37 | * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page. | ||||
40 | * | 38 | * | ||
41 | * Also some document types have glyphes - sets of characters rendered as one, so in search they should | 39 | * Ideally, every single glyph is represented by its own TextEntity. | ||
42 | * appear as a text but are only one character when drawn on screen. We need to allow this. | 40 | * If the textual representation of a graphical glyph contains more than one character, | ||
41 | * the TextEntity must contain the whole string which represents the glyph. | ||||
42 | * | ||||
43 | * When the Generator has created the TextPage, and it is added to a Page, | ||||
44 | * the text entities are reordered to words, lines, and paragraphs, to optimize search and text selection. | ||||
45 | * This way, the Generator does not need to care about the logical order of lines or paragraphs. | ||||
46 | * | ||||
47 | * @par Text Selection/Highlighting | ||||
48 | * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted. | ||||
49 | * That is, if the TextEntity represents a word, only the whole word can be selected. | ||||
50 | * It would not be possible to select a single glyph of the word, because its bounding box is not known. | ||||
51 | * | ||||
52 | * @see TextPage, Generator | ||||
43 | */ | 53 | */ | ||
44 | class OKULARCORE_EXPORT TextEntity | 54 | class OKULARCORE_EXPORT TextEntity | ||
45 | { | 55 | { | ||
46 | public: | 56 | public: | ||
47 | typedef QList<TextEntity*> List; | 57 | typedef QList<TextEntity*> List; | ||
48 | 58 | | |||
49 | /** | 59 | /** | ||
50 | * Creates a new text entity with the given @p text and the | 60 | * Creates a new text entity with the given @p text and the | ||
51 | * given @p area. | 61 | * given @p boundingBox. | ||
52 | */ | 62 | */ | ||
53 | TextEntity( const QString &text, NormalizedRect *area ); | 63 | TextEntity( const QString &text, NormalizedRect *boundingBox ); | ||
54 | 64 | | |||
55 | /** | 65 | /** | ||
56 | * Destroys the text entity. | 66 | * Destroys the text entity. | ||
57 | */ | 67 | */ | ||
58 | ~TextEntity(); | 68 | ~TextEntity(); | ||
59 | 69 | | |||
60 | /** | 70 | /** | ||
61 | * Returns the text of the text entity. | 71 | * Returns the text of the text entity. | ||
Show All 16 Lines | 85 | private: | |||
78 | 88 | | |||
79 | class Private; | 89 | class Private; | ||
80 | const Private *d; | 90 | const Private *d; | ||
81 | 91 | | |||
82 | Q_DISABLE_COPY( TextEntity ) | 92 | Q_DISABLE_COPY( TextEntity ) | ||
83 | }; | 93 | }; | ||
84 | 94 | | |||
85 | /** | 95 | /** | ||
86 | * The TextPage class represents the text of a page by | 96 | * @short Represents the textual information of a Page. Makes search and text selection possible. | ||
87 | * providing @see TextEntity items for every word/character of | 97 | * | ||
88 | * the page. | 98 | * A Generator with text support should add a TextPage to every Page. | ||
99 | * For every piece of text, a TextEntity is added, holding the string representation and the bounding box. | ||||
100 | * | ||||
101 | * Ideally, every TextEntity describes only one glyph. | ||||
102 | * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers). | ||||
103 | * | ||||
104 | * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection. | ||||
105 | * | ||||
106 | * @see TextEntity | ||||
89 | */ | 107 | */ | ||
90 | class OKULARCORE_EXPORT TextPage | 108 | class OKULARCORE_EXPORT TextPage | ||
91 | { | 109 | { | ||
92 | /// @cond PRIVATE | 110 | /// @cond PRIVATE | ||
93 | friend class Page; | 111 | friend class Page; | ||
94 | friend class PagePrivate; | 112 | friend class PagePrivate; | ||
95 | /// @endcond | 113 | /// @endcond | ||
96 | 114 | | |||
Show All 40 Lines | 115 | public: | |||
137 | * the search is case insensitive. | 155 | * the search is case insensitive. | ||
138 | * @param lastRect If 0 the search starts at the beginning of the page, otherwise | 156 | * @param lastRect If 0 the search starts at the beginning of the page, otherwise | ||
139 | * right/below the coordinates of the given rect. | 157 | * right/below the coordinates of the given rect. | ||
140 | */ | 158 | */ | ||
141 | RegularAreaRect* findText( int id, const QString &text, SearchDirection direction, | 159 | RegularAreaRect* findText( int id, const QString &text, SearchDirection direction, | ||
142 | Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect ); | 160 | Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect ); | ||
143 | 161 | | |||
144 | /** | 162 | /** | ||
145 | * Text extraction function. | 163 | * Text extraction function. Looks for text in the given @p area. | ||
146 | * | 164 | * | ||
147 | * Returns: | 165 | * @return | ||
148 | * - a null string if @p rect is a valid pointer to a null area | 166 | * - If @p area points to a valid null area, a null string. | ||
149 | * - the whole page text if @p rect is a null pointer | 167 | * - If @p area is nullptr, the whole page text as a single string. | ||
150 | * - the text which is included by rectangular area @p rect otherwise | 168 | * - Otherwise, the text which is included by @p area, as a single string. | ||
151 | * Uses AnyPixelTextAreaInclusionBehaviour | 169 | * Uses AnyPixelTextAreaInclusionBehaviour | ||
152 | */ | 170 | */ | ||
153 | QString text( const RegularAreaRect *rect = nullptr ) const; | 171 | QString text( const RegularAreaRect *area = nullptr ) const; | ||
154 | 172 | | |||
155 | /** | 173 | /** | ||
156 | * Text extraction function. | 174 | * Text extraction function. Looks for text in the given @p area. | ||
157 | * | 175 | * | ||
158 | * Returns: | 176 | * @return | ||
159 | * - a null string if @p rect is a valid pointer to a null area | 177 | * - If @p area points to a valid null area, a null string. | ||
160 | * - the whole page text if @p rect is a null pointer | 178 | * - If @p area is nullptr, the whole page text as a single string. | ||
161 | * - the text which is included by rectangular area @p rect otherwise | 179 | * - Otherwise, the text which is included by @p area, as a single string. | ||
162 | * @since 0.10 (KDE 4.4) | 180 | * @since 0.10 (KDE 4.4) | ||
163 | */ | 181 | */ | ||
164 | QString text( const RegularAreaRect * rect, TextAreaInclusionBehaviour b ) const; | 182 | QString text( const RegularAreaRect * area, TextAreaInclusionBehaviour b ) const; | ||
165 | 183 | | |||
166 | /** | 184 | /** | ||
167 | * Text entity extraction function. Similar to text() but returns | 185 | * Text entity extraction function. Similar to text() but returns | ||
168 | * the words including their bounding rectangles. Note that | 186 | * the words including their bounding rectangles. Note that | ||
169 | * ownership of the contents of the returned list belongs to the | 187 | * ownership of the contents of the returned list belongs to the | ||
170 | * caller. | 188 | * caller. | ||
171 | * @since 0.14 (KDE 4.8) | 189 | * @since 0.14 (KDE 4.8) | ||
172 | */ | 190 | */ | ||
Show All 23 Lines |