Changeset View
Changeset View
Standalone View
Standalone View
data/gentrigrams.cpp
Show All 19 Lines | |||||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | 20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA | ||
21 | * 02110-1301 USA | 21 | * 02110-1301 USA | ||
22 | */ | 22 | */ | ||
23 | 23 | | |||
24 | #include <QtCore/QFile> | 24 | #include <QtCore/QFile> | ||
25 | #include <QtCore/QHash> | 25 | #include <QtCore/QHash> | ||
26 | #include <QtCore/QString> | 26 | #include <QtCore/QString> | ||
27 | #include <QtCore/QDebug> | 27 | #include <QtCore/QDebug> | ||
28 | #include "guesslanguage.h" | ||||
28 | 29 | | |||
29 | int main(int argc, char *argv[]) | 30 | int main(int argc, char *argv[]) | ||
30 | { | 31 | { | ||
31 | if (argc < 3) { | 32 | if (argc < 3) { | ||
32 | qWarning() << argv[0] << "corpus.txt outfile.trigram"; | 33 | qWarning() << argv[0] << "corpus.txt outfile.trigram"; | ||
33 | return -1; | 34 | return -1; | ||
34 | } | 35 | } | ||
35 | 36 | | |||
Show All 34 Lines | 69 | for (const QString &key : model.keys()) { | |||
70 | bool hasTwoSpaces=(data[1].isSpace() && (data[0].isSpace() || data[2].isSpace())); | 71 | bool hasTwoSpaces=(data[1].isSpace() && (data[0].isSpace() || data[2].isSpace())); | ||
71 | 72 | | |||
72 | if (!hasTwoSpaces) orderedTrigrams.insertMulti(model[key], key); | 73 | if (!hasTwoSpaces) orderedTrigrams.insertMulti(model[key], key); | ||
73 | } | 74 | } | ||
74 | qDebug() << "Sorted!"; | 75 | qDebug() << "Sorted!"; | ||
75 | 76 | | |||
76 | qDebug() << "Weeding out..."; | 77 | qDebug() << "Weeding out..."; | ||
77 | QMap<int, QString>::iterator i = orderedTrigrams.begin(); | 78 | QMap<int, QString>::iterator i = orderedTrigrams.begin(); | ||
78 | while (orderedTrigrams.size() > 300) { | 79 | while (orderedTrigrams.size() > Sonnet::MAXGRAMS) { | ||
79 | orderedTrigrams.erase(i); | 80 | i = orderedTrigrams.erase(i); | ||
80 | i++; | | |||
81 | } | 81 | } | ||
82 | qDebug() << "Weeded!"; | 82 | qDebug() << "Weeded!"; | ||
83 | 83 | | |||
84 | qDebug() << "Storing..."; | 84 | qDebug() << "Storing..."; | ||
85 | i = orderedTrigrams.end(); | 85 | i = orderedTrigrams.end(); | ||
86 | int count=0; | 86 | int count=0; | ||
87 | QTextStream outStream(&outFile); | 87 | QTextStream outStream(&outFile); | ||
88 | outStream.setCodec("UTF-8"); | 88 | outStream.setCodec("UTF-8"); | ||
89 | while (i != orderedTrigrams.begin()) { | 89 | while (i != orderedTrigrams.begin()) { | ||
90 | --i; | 90 | --i; | ||
91 | outStream << *i << "\t\t\t" << count++ << '\n'; | 91 | outStream << *i << "\t\t\t" << count++ << '\n'; | ||
92 | } | 92 | } | ||
93 | } | 93 | } | ||
94 | 94 | |