Changeset View
Standalone View
core/textpage.h
Show All 23 Lines | |||||
24 | class NormalizedRect; | 24 | class NormalizedRect; | ||
25 | class Page; | 25 | class Page; | ||
26 | class PagePrivate; | 26 | class PagePrivate; | ||
27 | class TextPagePrivate; | 27 | class TextPagePrivate; | ||
28 | class TextSelection; | 28 | class TextSelection; | ||
29 | class RegularAreaRect; | 29 | class RegularAreaRect; | ||
30 | 30 | | |||
31 | /*! @class TextEntity | 31 | /*! @class TextEntity | ||
32 | * @short Abstract textentity of Okular | 32 | * @short Represents a piece of text on a TextPage, containing its textual representation and its bounding box. | ||
33 | * @par The context | | |||
34 | * A document can provide different forms of information about textual representation | | |||
35 | * of its contents. It can include information about positions of every character on the | | |||
36 | * page, this is the best possibility. | | |||
37 | * | 33 | * | ||
38 | * But also it can provide information only about positions of every word on the page (not the character). | 34 | * To enable searching and text selection, a generator can give information about the textual | ||
39 | * Furthermore it can provide information only about the position of the whole page's text on the page. | 35 | * content of a Page using a TextPage. | ||
36 | * A TextPage is created using TextEntity objects. | ||||
37 | * A TextEntity can represent a single character/glyph, a word, a line, or even the whole page. | ||||
40 | * | 38 | * | ||
41 | * Also some document types have glyphes - sets of characters rendered as one, so in search they should | 39 | * Ideally, every single glyph is represented by an own TextEntity. | ||
aacid: s/an/its ? | |||||
42 | * appear as a text but are only one character when drawn on screen. We need to allow this. | 40 | * If the textual representation of a graphical glyph contains more than one character, | ||
41 | * the TextEntity must contain the whole string which represents the glyph. | ||||
42 | * | ||||
43 | * When the Generator has created the TextPage, and it is added to a Page, | ||||
44 | * the text entitys are reordered to words, lines, and paragraphs, to optimize search and text selection. | ||||
aacid: entities | |||||
45 | * This way, the Generator does not need to care about the logical order of lines or paragraphs. | ||||
46 | * | ||||
47 | * @par Text Selection/Highlighting | ||||
48 | * A TextEntity is the smallest piece of text, which the user can select, or which can be highlighted. | ||||
49 | * That is, if the TextEntity represents a word, only the whole word can be selected. | ||||
50 | * It would not be possible to select a single glyph of the word, because its bounding box is not known. | ||||
51 | * | ||||
52 | * @par Vertical Text | ||||
Documentation can’t fix https://bugs.kde.org/show_bug.cgi?id=407133. As soon as TextPagePrivate::correctTextOrder handles vertical text, this paragraph can be removed. davidhurka: Documentation can’t fix https://bugs.kde.org/show_bug.cgi?id=407133. As soon as TextPagePrivate… | |||||
53 | * Currently, the reordering mixes up TextEntitys which represent glyphs or words of vertical text. | ||||
I'm not convinced we should mention this (and even less in this class), sure it's a bug, but hopefully it'll be fixed and when it does noone will remember to remove this since the bug is not even in this class. aacid: I'm not convinced we should mention this (and even less in this class), sure it's a bug, but… | |||||
davidhurka: Ok, then I will remove it. | |||||
54 | * If searching for or highlighting of vertical text is important for the document type, | ||||
55 | * the Generator should add them as a single TextEntity. | ||||
56 | * | ||||
57 | * @see TextPage, Generator | ||||
43 | */ | 58 | */ | ||
44 | class OKULARCORE_EXPORT TextEntity | 59 | class OKULARCORE_EXPORT TextEntity | ||
45 | { | 60 | { | ||
46 | public: | 61 | public: | ||
47 | typedef QList<TextEntity*> List; | 62 | typedef QList<TextEntity*> List; | ||
48 | 63 | | |||
49 | /** | 64 | /** | ||
50 | * Creates a new text entity with the given @p text and the | 65 | * Creates a new text entity with the given @p text and the | ||
51 | * given @p area. | 66 | * given @p boundingBox. | ||
52 | */ | 67 | */ | ||
53 | TextEntity( const QString &text, NormalizedRect *area ); | 68 | TextEntity( const QString &text, NormalizedRect *boundingBox ); | ||
54 | 69 | | |||
55 | /** | 70 | /** | ||
56 | * Destroys the text entity. | 71 | * Destroys the text entity. | ||
57 | */ | 72 | */ | ||
58 | ~TextEntity(); | 73 | ~TextEntity(); | ||
59 | 74 | | |||
60 | /** | 75 | /** | ||
61 | * Returns the text of the text entity. | 76 | * Returns the text of the text entity. | ||
Show All 16 Lines | 90 | private: | |||
78 | 93 | | |||
79 | class Private; | 94 | class Private; | ||
80 | const Private *d; | 95 | const Private *d; | ||
81 | 96 | | |||
82 | Q_DISABLE_COPY( TextEntity ) | 97 | Q_DISABLE_COPY( TextEntity ) | ||
83 | }; | 98 | }; | ||
84 | 99 | | |||
85 | /** | 100 | /** | ||
86 | * The TextPage class represents the text of a page by | 101 | * @short Represents the textual information of a Page. Makes search and text selection possible. | ||
87 | * providing @see TextEntity items for every word/character of | 102 | * | ||
88 | * the page. | 103 | * A Generator with text support should add a TextPage to every Page. | ||
104 | * For every piece of text, a TextEntity is added, holding the string representation and the bounding box. | ||||
105 | * | ||||
106 | * Ideally, every TextEntity describes only one glyph. | ||||
107 | * A "glyph" is one character in the graphical representation, but the textual representation may consist of multiple characters (like diacritic modifiers). | ||||
108 | * | ||||
109 | * When the TextPage is added to the Page, the TextEntitys are restructured to optimize text selection. | ||||
110 | * | ||||
111 | * @see TextEntity | ||||
89 | */ | 112 | */ | ||
90 | class OKULARCORE_EXPORT TextPage | 113 | class OKULARCORE_EXPORT TextPage | ||
91 | { | 114 | { | ||
92 | /// @cond PRIVATE | 115 | /// @cond PRIVATE | ||
93 | friend class Page; | 116 | friend class Page; | ||
94 | friend class PagePrivate; | 117 | friend class PagePrivate; | ||
95 | /// @endcond | 118 | /// @endcond | ||
96 | 119 | | |||
Show All 40 Lines | 120 | public: | |||
137 | * the search is case insensitive. | 160 | * the search is case insensitive. | ||
138 | * @param lastRect If 0 the search starts at the beginning of the page, otherwise | 161 | * @param lastRect If 0 the search starts at the beginning of the page, otherwise | ||
139 | * right/below the coordinates of the given rect. | 162 | * right/below the coordinates of the given rect. | ||
140 | */ | 163 | */ | ||
141 | RegularAreaRect* findText( int id, const QString &text, SearchDirection direction, | 164 | RegularAreaRect* findText( int id, const QString &text, SearchDirection direction, | ||
142 | Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect ); | 165 | Qt::CaseSensitivity caseSensitivity, const RegularAreaRect *lastRect ); | ||
143 | 166 | | |||
144 | /** | 167 | /** | ||
145 | * Text extraction function. | 168 | * Text extraction function. Looks for text in the given @param area, and concatenates it to a string. | ||
i don't see the extra value added by this addition, the "Retuns" below already says that this function does, but if you want to have a sentence here i'd prefer "and returns it as a string" over "and concatenates it to a string". aacid: i don't see the extra value added by this addition, the "Retuns" below already says that this… | |||||
Agreed, will remove that sentence. This is related to textArea( TextSelction * ), I will mention that instead. davidhurka: Agreed, will remove that sentence.
This is related to textArea( TextSelction * ), I will… | |||||
concatenate is not the word you want here. Concactenate means "apppend" and there's no string passed in to get it appended to. "returns it as a string", which as pointed out is already below, but if you really want that sentence to exist please use return as not concatenate it to aacid: concatenate is not the word you want here.
Concactenate means "apppend" and there's no string… | |||||
How do you get from concatenate to append? I’m not the best english speaker, but I think concatenate (as verb) does not take that kind of object. Well, I will avoid it completely. davidhurka: How do you get from concatenate to append?
I’m not the best english speaker, but I think… | |||||
concatenate = to put things together as a connected series https://en.wikipedia.org/wiki/Concatenation says 'For example, the concatenation of "snow" and "ball" is "snowball".' That's why concatenate to me implies more than one thing, and here's there just one, the string itself. But i'm not a native english speaker either so may be totally wrong aacid: concatenate = to put things together as a connected series
https://en.wikipedia. | |||||
It was meant differently. It concatenates the TextEntities, which are kind of strings, and more than only one. The result is the one string which is returned. :) davidhurka: It was meant differently. It concatenates the TextEntities, which are kind of strings, and more… | |||||
146 | * | 169 | * | ||
147 | * Returns: | 170 | * Returns: | ||
148 | * - a null string if @p rect is a valid pointer to a null area | 171 | * - a null string if @p area is a valid pointer to a null area | ||
149 | * - the whole page text if @p rect is a null pointer | 172 | * - the whole page text if @p area is a null pointer | ||
150 | * - the text which is included by rectangular area @p rect otherwise | 173 | * - the text which is included by @p area otherwise | ||
151 | * Uses AnyPixelTextAreaInclusionBehaviour | 174 | * Uses AnyPixelTextAreaInclusionBehaviour | ||
152 | */ | 175 | */ | ||
153 | QString text( const RegularAreaRect *rect = nullptr ) const; | 176 | QString text( const RegularAreaRect *area = nullptr ) const; | ||
154 | 177 | | |||
155 | /** | 178 | /** | ||
156 | * Text extraction function. | 179 | * Text extraction function. Looks for text in the given @param area, and concatenates it to a string. | ||
Should be "@p area". yurchor: Should be "@p area".
http://www.doxygen.nl/manual/commands.html#cmdp | |||||
aacid: same as above | |||||
157 | * | 180 | * | ||
158 | * Returns: | 181 | * Returns: | ||
159 | * - a null string if @p rect is a valid pointer to a null area | 182 | * - a null string if @p area is a valid pointer to a null area | ||
160 | * - the whole page text if @p rect is a null pointer | 183 | * - the whole page text if @p area is a null pointer | ||
161 | * - the text which is included by rectangular area @p rect otherwise | 184 | * - the text which is included by rectangular area @p area otherwise | ||
162 | * @since 0.10 (KDE 4.4) | 185 | * @since 0.10 (KDE 4.4) | ||
163 | */ | 186 | */ | ||
164 | QString text( const RegularAreaRect * rect, TextAreaInclusionBehaviour b ) const; | 187 | QString text( const RegularAreaRect * area, TextAreaInclusionBehaviour b ) const; | ||
165 | 188 | | |||
166 | /** | 189 | /** | ||
167 | * Text entity extraction function. Similar to text() but returns | 190 | * Text entity extraction function. Similar to text() but returns | ||
168 | * the words including their bounding rectangles. Note that | 191 | * the words including their bounding rectangles. Note that | ||
169 | * ownership of the contents of the returned list belongs to the | 192 | * ownership of the contents of the returned list belongs to the | ||
170 | * caller. | 193 | * caller. | ||
171 | * @since 0.14 (KDE 4.8) | 194 | * @since 0.14 (KDE 4.8) | ||
172 | */ | 195 | */ | ||
Show All 23 Lines |
s/an/its ?