Differential D14738 Diff 39436 src/lib/3rdparty/markdown-tokens.cpp

Changeset View

Standalone View

src/lib/3rdparty/markdown-tokens.cpp

This file was added.

		1
		2		/*
		3		Copyright (c) 2009 by Chad Nelson
		4		Released under the MIT License.
		5		See the provided LICENSE.TXT file for details.
		6		*/
		7
		8		#include "markdown-tokens.h"
		9
		10		#include <stack>
		11
		12		#include <boost/lexical_cast.hpp>
		13		#include <boost/regex.hpp>
		14		#include <boost/unordered_set.hpp>
		15
		16		using std::cerr;
		17		using std::endl;
		18
		19		namespace markdown {
		20		namespace token {
		21
		22		namespace {
		23
		24		const std::string cEscapedCharacters("\\`*_{}[]()#+-.!>");
		25
		26		optional<size_t> isEscapedCharacter(char c) {
		27		std::string::const_iterator i=std::find(cEscapedCharacters.begin(),
		28		cEscapedCharacters.end(), c);
		29		if (i!=cEscapedCharacters.end())
		30		return std::distance(cEscapedCharacters.begin(), i);
		31		else return none;
		32		}
		33
		34		char escapedCharacter(size_t index) {
		35		return cEscapedCharacters[index];
		36		}
		37
		38		std::string encodeString(const std::string& src, int encodingFlags) {
		39		bool amps=(encodingFlags & cAmps)!=0,
		40		doubleAmps=(encodingFlags & cDoubleAmps)!=0,
		41		angleBrackets=(encodingFlags & cAngles)!=0,
		42		quotes=(encodingFlags & cQuotes)!=0;
		43
		44		std::string tgt;
		45		for (std::string::const_iterator i=src.begin(), ie=src.end(); i!=ie; ++i) {
		46		if (*i=='&' && amps) {
		47		static const boost::regex cIgnore("^(&)\|(&#[0-9]{1,3};)\|(&#[xX][0-9a-fA-F]{1,2};)");
		48		if (boost::regex_search(i, ie, cIgnore)) {
		49		tgt.push_back(*i);
		50		} else {
		51		tgt+="&";
		52		}
		53		}
		54		else if (*i=='&' && doubleAmps) tgt+="&";
		55		else if (*i=='<' && angleBrackets) tgt+="<";
		56		else if (*i=='>' && angleBrackets) tgt+=">";
		57		else if (*i=='\"' && quotes) tgt+=""";
		58		else tgt.push_back(*i);
		59		}
		60		return tgt;
		61		}
		62
		63		bool looksLikeUrl(const std::string& str) {
		64		const char *schemes[]={ "http://", "https://", "ftp://", "ftps://",
		65		"file://", "www.", "ftp.", 0 };
		66		for (size_t x=0; schemes[x]!=0; ++x) {
		67		const char s=str.c_str(), t=schemes[x];
		68		while (s!=0 && t!=0 && s==t) { ++s; ++t; }
		69		if (*t==0) return true;
		70		}
		71		return false;
		72		}
		73
		74		bool notValidNameCharacter(char c) {
		75		return !(isalnum(c) \|\| c=='.' \|\| c=='_' \|\| c=='%' \|\| c=='-' \|\| c=='+');
		76		}
		77
		78		bool notValidSiteCharacter(char c) {
		79		// NOTE: Kludge alert! The official spec for site characters is only
		80		// "a-zA-Z._%-". However, MDTest supports "international domain names,"
		81		// which use characters other than that; I'm kind of cheating here, handling
		82		// those by allowing all utf8-encoded characters too.
		83		return !(isalnum(c) \|\| c=='.' \|\| c=='_' \|\| c=='%' \|\| c=='-' \|\| (c & 0x80));
		84		}
		85
		86		bool isNotAlpha(char c) {
		87		return !isalpha(c);
		88		}
		89
		90		std::string emailEncode(const std::string& src) {
		91		std::ostringstream out;
		92		bool inHex=false;
		93		for (std::string::const_iterator i=src.begin(), ie=src.end(); i!=ie;
		94		++i)
		95		{
		96		if (i & 0x80) out << i;
		97		else if (inHex) {
		98		out << "&#x" << std::hex << static_cast<int>(*i) << ';';
		99		} else {
		100		out << "&#" << std::dec << static_cast<int>(*i) << ';';
		101		}
		102		inHex=!inHex;
		103		}
		104		return out.str();
		105		}
		106
		107		bool looksLikeEmailAddress(const std::string& str) {
		108		typedef std::string::const_iterator Iter;
		109		typedef std::string::const_reverse_iterator RIter;
		110		Iter i=std::find_if(str.begin(), str.end(), notValidNameCharacter);
		111		if (i!=str.end() && *i=='@' && i!=str.begin()) {
		112		// The name part is valid.
		113		i=std::find_if(i+1, str.end(), notValidSiteCharacter);
		114		if (i==str.end()) {
		115		// The site part doesn't contain any invalid characters.
		116		RIter ri=std::find_if(str.rbegin(), str.rend(), isNotAlpha);
		117		if (ri!=str.rend() && *ri=='.') {
		118		// It ends with a dot and only alphabetic characters.
		119		size_t d=std::distance(ri.base(), str.end());
		120		if (d>=2 && d<=4) {
		121		// There are two-to-four of them. It's valid.
		122		return true;
		123		}
		124		}
		125		}
		126		}
		127		return false;
		128		}
		129
		130		// From <http://en.wikipedia.org/wiki/HTML_element>
		131
		132		const char *cOtherTagInit[]={
		133		// Header tags
		134		"title/", "base", "link", "basefont", "script/", "style/",
		135		"object/", "meta",
		136
		137		// Inline tags
		138		"em/", "strong/", "q/", "cite/", "dfn/", "abbr/", "acronym/",
		139		"code/", "samp/", "kbd/", "var/", "sub/", "sup/", "del/", "ins/",
		140		"isindex", "a/", "img", "br", "map/", "area", "object/", "param",
		141		"applet/", "span/",
		142
		143		0 };
		144
		145		const char *cBlockTagInit[]={ "p/", "blockquote/", "hr", "h1/", "h2/",
		146		"h3/", "h4/", "h5/", "h6/", "dl/", "dt/", "dd/", "ol/", "ul/",
		147		"li/", "dir/", "menu/", "table/", "tr/", "th/", "td/", "col",
		148		"colgroup/", "caption/", "thead/", "tbody/", "tfoot/", "form/",
		149		"select/", "option", "input", "label/", "textarea/", "div/", "pre/",
		150		"address/", "iframe/", "frame/", "frameset/", "noframes/",
		151		"center/", "b/", "i/", "big/", "small/", /"s/",/ "strike/", "tt/",
		152		"u/", "font/", "ins/", "del/", 0 };
		153
		154		// Other official ones (not presently in use in this code)
		155		//"!doctype", "bdo", "body", "button", "fieldset", "head", "html",
		156		//"legend", "noscript", "optgroup", "xmp",
		157
		158		boost::unordered_set<std::string> otherTags, blockTags;
		159
		160		void initTag(boost::unordered_set<std::string> &set, const char *init[]) {
		161		for (size_t x=0; init[x]!=0; ++x) {
		162		std::string str=init[x];
		163		if (*str.rbegin()=='/') {
		164		// Means it can have a closing tag
		165		str=str.substr(0, str.length()-1);
		166		}
		167		set.insert(str);
		168		}
		169		}
		170
		171		std::string cleanTextLinkRef(const std::string& ref) {
		172		std::string r;
		173		for (std::string::const_iterator i=ref.begin(), ie=ref.end(); i!=ie;
		174		++i)
		175		{
		176		if (*i==' ') {
		177		if (r.empty() \|\| *r.rbegin()!=' ') r.push_back(' ');
		178		} else r.push_back(*i);
		179		}
		180		return r;
		181		}
		182
		183		} // namespace
		184
		185
		186
		187		size_t isValidTag(const std::string& tag, bool nonBlockFirst) {
		188		if (blockTags.empty()) {
		189		initTag(otherTags, cOtherTagInit);
		190		initTag(blockTags, cBlockTagInit);
		191		}
		192
		193		if (nonBlockFirst) {
		194		if (otherTags.find(tag)!=otherTags.end()) return 1;
		195		if (blockTags.find(tag)!=blockTags.end()) return 2;
		196		} else {
		197		if (blockTags.find(tag)!=blockTags.end()) return 2;
		198		if (otherTags.find(tag)!=otherTags.end()) return 1;
		199		}
		200		return 0;
		201		}
		202
		203
		204
		205		void TextHolder::writeAsHtml(std::ostream& out) const {
		206		preWrite(out);
		207		if (mEncodingFlags!=0) {
		208		out << encodeString(mText, mEncodingFlags);
		209		} else {
		210		out << mText;
		211		}
		212		postWrite(out);
		213		}
		214
		215		optional<TokenGroup> RawText::processSpanElements(const LinkIds& idTable) {
		216		if (!canContainMarkup()) return none;
		217
		218		ReplacementTable replacements;
		219		std::string str=_processHtmlTagAttributes(*text(), replacements);
		220		str=_processCodeSpans(str, replacements);
		221		str=_processEscapedCharacters(str);
		222		str=_processLinksImagesAndTags(str, replacements, idTable);
		223		return _processBoldAndItalicSpans(str, replacements);
		224		}
		225
		226		std::string RawText::_processHtmlTagAttributes(std::string src, ReplacementTable&
		227		replacements)
		228		{
		229		// Because "Attribute Content Is Not A Code Span"
		230		std::string tgt;
		231		std::string::const_iterator prev=src.begin(), end=src.end();
		232		while (1) {
		233		static const boost::regex cHtmlToken("<((/?)([a-zA-Z0-9]+)(?:( +[a-zA-Z0-9]+?(?: ?= ?(\"\|').?\\5))+? /? *))>");
		234		boost::smatch m;
		235		if (boost::regex_search(prev, end, m, cHtmlToken)) {
		236		// NOTE: Kludge alert! The `isValidTag` test is a cheat, only here
		237		// to handle some edge cases between the Markdown test suite and the
		238		// PHP-Markdown one, which seem to conflict.
		239		if (isValidTag(m[3])) {
		240		tgt+=std::string(prev, m[0].first);
		241
		242		std::string fulltag=m[0], tgttag;
		243		std::string::const_iterator prevtag=fulltag.begin(), endtag=fulltag.end();
		244		while (1) {
		245		static const boost::regex cAttributeStrings("= ?(\"\|').*?\\1");
		246		boost::smatch mtag;
		247		if (boost::regex_search(prevtag, endtag, mtag, cAttributeStrings)) {
		248		tgttag+=std::string(prevtag, mtag[0].first);
		249		tgttag+="\x01@"+boost::lexical_cast<std::string>(replacements.size())+"@htmlTagAttr\x01";
		250		prevtag=mtag[0].second;
		251
		252		replacements.push_back(TokenPtr(new TextHolder(std::string(mtag[0]), false, cAmps\|cAngles)));
		253		} else {
		254		tgttag+=std::string(prevtag, endtag);
		255		break;
		256		}
		257		}
		258		tgt+=tgttag;
		259		prev=m[0].second;
		260		} else {
		261		tgt+=std::string(prev, m[0].second);
		262		prev=m[0].second;
		263		}
		264		} else {
		265		tgt+=std::string(prev, end);
		266		break;
		267		}
		268		}
		269
		270		return tgt;
		271		}
		272
		273		std::string RawText::_processCodeSpans(std::string src, ReplacementTable&
		274		replacements)
		275		{
		276		static const boost::regex cCodeSpan[2]={
		277		boost::regex("(?:^\|(?<=[^\\\\]))`` (.+?) ``"),
		278		boost::regex("(?:^\|(?<=[^\\\\]))`(.+?)`")
		279		};
		280		for (int pass=0; pass<2; ++pass) {
		281		std::string tgt;
		282		std::string::const_iterator prev=src.begin(), end=src.end();
		283		while (1) {
		284		boost::smatch m;
		285		if (boost::regex_search(prev, end, m, cCodeSpan[pass])) {
		286		tgt+=std::string(prev, m[0].first);
		287		tgt+="\x01@"+boost::lexical_cast<std::string>(replacements.size())+"@codeSpan\x01";
		288		prev=m[0].second;
		289		replacements.push_back(TokenPtr(new CodeSpan(_restoreProcessedItems(m[1], replacements))));
		290		} else {
		291		tgt+=std::string(prev, end);
		292		break;
		293		}
		294		}
		295		src.swap(tgt);
		296		tgt.clear();
		297		}
		298		return src;
		299		}
		300
		301		std::string RawText::_processEscapedCharacters(const std::string& src) {
		302		std::string tgt;
		303		std::string::const_iterator prev=src.begin(), end=src.end();
		304		while (1) {
		305		std::string::const_iterator i=std::find(prev, end, '\\');
		306		if (i!=end) {
		307		tgt+=std::string(prev, i);
		308		++i;
		309		if (i!=end) {
		310		optional<size_t> e=isEscapedCharacter(*i);
		311		if (e) tgt+="\x01@#"+boost::lexical_cast<std::string>(*e)+"@escaped\x01";
		312		else tgt=tgt+'\\'+*i;
		313		prev=i+1;
		314		} else {
		315		tgt+='\\';
		316		break;
		317		}
		318		} else {
		319		tgt+=std::string(prev, end);
		320		break;
		321		}
		322		}
		323		return tgt;
		324		}
		325
		326		std::string RawText::_processSpaceBracketedGroupings(const std::string &src,
		327		ReplacementTable& replacements)
		328		{
		329		static const boost::regex cRemove("(?:(?: \\*+ )\|(?: _+ ))");
		330
		331		std::string tgt;
		332		std::string::const_iterator prev=src.begin(), end=src.end();
		333		while (1) {
		334		boost::smatch m;
		335		if (boost::regex_search(prev, end, m, cRemove)) {
		336		tgt+=std::string(prev, m[0].first);
		337		tgt+="\x01@"+boost::lexical_cast<std::string>(replacements.size())+"@spaceBracketed\x01";
		338		replacements.push_back(TokenPtr(new RawText(m[0])));
		339		prev=m[0].second;
		340		} else {
		341		tgt+=std::string(prev, end);
		342		break;
		343		}
		344		}
		345		return tgt;
		346		}
		347
		348		std::string RawText::_processLinksImagesAndTags(const std::string &src,
		349		ReplacementTable& replacements, const LinkIds& idTable)
		350		{
		351		// NOTE: Kludge alert! The "inline link or image" regex should be...
		352		//
		353		// "(?:(!?)\\[(.+?)\\] \$(.?)\$)"
		354		//
		355		// ...but that fails on the 'Images' test because it includes a "stupid URL"
		356		// that has parentheses within it. The proper way to deal with this would be
		357		// to match any nested parentheses, but regular expressions can't handle an
		358		// unknown number of nested items, so I'm cheating -- the regex for it
		359		// allows for one (and only one) pair of matched parentheses within the
		360		// URL. It makes the regex hard to follow (it was even harder to get right),
		361		// but it allows it to pass the test.
		362		//
		363		// The "reference link or image" one has a similar problem; it should be...
		364		//
		365		// "\|(?:(!?)\\[(.+?)\\](?: \\[(.?)\\])?)"
		366		//
		367		static const boost::regex cExpression(
		368		"(?:(!?)\\[([^\\]]+?)\\] \$([^\\(](?:\\(.?\$.?)*?)\\))" // Inline link or image
		369		"\|(?:(!?)\\[((?:[^]]?\\[.?\\].?)\|(?:.+?))\\](?: \\[(.*?)\\])?)" // Reference link or image
		370		"\|(?:<(/?([a-zA-Z0-9]+).*?)>)" // potential HTML tag or auto-link
		371		);
		372		// Important captures: 1/4=image indicator, 2/5=contents/alttext,
		373		// 3=URL/title, 6=optional link ID, 7=potential HTML tag or auto-link
		374		// contents, 8=actual tag from 7.
		375
		376		std::string tgt;
		377		std::string::const_iterator prev=src.begin(), end=src.end();
		378		while (1) {
		379		boost::smatch m;
		380		if (boost::regex_search(prev, end, m, cExpression)) {
		381		assert(m[0].matched);
		382		assert(m[0].length()!=0);
		383
		384		tgt+=std::string(prev, m[0].first);
		385		tgt+="\x01@"+boost::lexical_cast<std::string>(replacements.size())+"@links&Images1\x01";
		386		prev=m[0].second;
		387
		388		bool isImage=false, isLink=false, isReference=false;
		389		if (m[4].matched && m[4].length()) isImage=isReference=true;
		390		else if (m[1].matched && m[1].length()) isImage=true;
		391		else if (m[5].matched) isLink=isReference=true;
		392		else if (m[2].matched) isLink=true;
		393
		394		if (isImage \|\| isLink) {
		395		std::string contentsOrAlttext, url, title;
		396		bool resolved=false;
		397		if (isReference) {
		398		contentsOrAlttext=m[5];
		399		std::string linkId=(m[6].matched ? std::string(m[6]) : std::string());
		400		if (linkId.empty()) linkId=cleanTextLinkRef(contentsOrAlttext);
		401
		402		optional<markdown::LinkIds::Target> target=idTable.find(linkId);
		403		if (target) { url=target->url; title=target->title; resolved=true; };
		404		} else {
		405		static const boost::regex cReference("^<?([^ >])>?(?: (?:('\|\")(.)\\2)\|(?:\$(.)\$))? *$");
		406		// Useful captures: 1=url, 3/4=title
		407		contentsOrAlttext=m[2];
		408		std::string urlAndTitle=m[3];
		409		boost::smatch mm;
		410		if (boost::regex_match(urlAndTitle, mm, cReference)) {
		411		url=mm[1];
		412		if (mm[3].matched) title=mm[3];
		413		else if (mm[4].matched) title=mm[4];
		414		resolved=true;
		415		}
		416		}
		417
		418		if (!resolved) {
		419		// Just encode the first character as-is, and continue
		420		// searching after it.
		421		prev=m[0].first+1;
		422		replacements.push_back(TokenPtr(new RawText(std::string(m[0].first, prev))));
		423		} else if (isImage) {
		424		replacements.push_back(TokenPtr(new Image(contentsOrAlttext,
		425		url, title)));
		426		} else {
		427		replacements.push_back(TokenPtr(new HtmlAnchorTag(url, title)));
		428		tgt+=contentsOrAlttext;
		429		tgt+="\x01@"+boost::lexical_cast<std::string>(replacements.size())+"@links&Images2\x01";
		430		replacements.push_back(TokenPtr(new HtmlTag("/a")));
		431		}
		432		} else {
		433		// Otherwise it's an HTML tag or auto-link.
		434		std::string contents=m[7];
		435
		436		// cerr << "Evaluating potential HTML or auto-link: " << contents << endl;
		437		// cerr << "m[8]=" << m[8] << endl;
		438
		439		if (looksLikeUrl(contents)) {
		440		TokenGroup subgroup;
		441		subgroup.push_back(TokenPtr(new HtmlAnchorTag(contents)));
		442		subgroup.push_back(TokenPtr(new RawText(contents, false)));
		443		subgroup.push_back(TokenPtr(new HtmlTag("/a")));
		444		replacements.push_back(TokenPtr(new Container(subgroup)));
		445		} else if (looksLikeEmailAddress(contents)) {
		446		TokenGroup subgroup;
		447		subgroup.push_back(TokenPtr(new HtmlAnchorTag(emailEncode("mailto:"+contents))));
		448		subgroup.push_back(TokenPtr(new RawText(emailEncode(contents), false)));
		449		subgroup.push_back(TokenPtr(new HtmlTag("/a")));
		450		replacements.push_back(TokenPtr(new Container(subgroup)));
		451		} else if (isValidTag(m[8])) {
		452		replacements.push_back(TokenPtr(new HtmlTag(_restoreProcessedItems(contents, replacements))));
		453		} else {
		454		// Just encode it as-is
		455		replacements.push_back(TokenPtr(new RawText(m[0])));
		456		}
		457		}
		458		} else {
		459		tgt+=std::string(prev, end);
		460		break;
		461		}
		462		}
		463		return tgt;
		464		}
		465
		466		TokenGroup RawText::_processBoldAndItalicSpans(const std::string& src,
		467		ReplacementTable& replacements)
		468		{
		469		static const boost::regex cEmphasisExpression(
		470		"(?:(?<![_])([_]{1,3})([^_ ]+?)\\1(?![_]))" // Mid-word emphasis
		471		"\|((?:(?<!\\)\\{1,3}(?!\\*)\|(?<!_)_{1,3}(?!_))(?=.)(?! )(?![.,:;] )(?![.,:;]$))" // Open
		472		"\|((?<![* ])\\{1,3}(?!\\)\|(?<![ _])_{1,3}(?!_))" // Close
		473		);
		474
		475		TokenGroup tgt;
		476		std::string::const_iterator i=src.begin(), end=src.end(), prev=i;
		477
		478		while (1) {
		479		boost::smatch m;
		480		if (boost::regex_search(prev, end, m, cEmphasisExpression)) {
		481		if (prev!=m[0].first) tgt.push_back(TokenPtr(new
		482		RawText(std::string(prev, m[0].first))));
		483		if (m[3].matched) {
		484		std::string token=m[3];
		485		tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0],
		486		token.length())));
		487		prev=m[0].second;
		488		} else if (m[4].matched) {
		489		std::string token=m[4];
		490		tgt.push_back(TokenPtr(new BoldOrItalicMarker(false, token[0],
		491		token.length())));
		492		prev=m[0].second;
		493		} else {
		494		std::string token=m[1], contents=m[2];
		495		tgt.push_back(TokenPtr(new BoldOrItalicMarker(true, token[0],
		496		token.length())));
		497		tgt.push_back(TokenPtr(new RawText(std::string(contents))));
		498		tgt.push_back(TokenPtr(new BoldOrItalicMarker(false, token[0],
		499		token.length())));
		500		prev=m[0].second;
		501		}
		502		} else {
		503		if (prev!=end) tgt.push_back(TokenPtr(new RawText(std::string(prev,
		504		end))));
		505		break;
		506		}
		507		}
		508
		509		int id=0;
		510		for (TokenGroup::iterator ii=tgt.begin(), iie=tgt.end(); ii!=iie; ++ii) {
		511		if ((*ii)->isUnmatchedOpenMarker()) {
		512		BoldOrItalicMarker openToken=dynamic_cast<BoldOrItalicMarker>(ii->get());
		513
		514		// Find a matching close-marker, if it's there
		515		TokenGroup::iterator iii=ii;
		516		for (++iii; iii!=iie; ++iii) {
		517		if ((*iii)->isUnmatchedCloseMarker()) {
		518		BoldOrItalicMarker closeToken=dynamic_cast<BoldOrItalicMarker>(iii->get());
		519		if (closeToken->size()==3 && openToken->size()!=3) {
		520		// Split the close-token into a match for the open-token
		521		// and a second for the leftovers.
		522		closeToken->disable();
		523		TokenGroup g;
		524		g.push_back(TokenPtr(new BoldOrItalicMarker(false,
		525		closeToken->tokenCharacter(), closeToken->size()-
		526		openToken->size())));
		527		g.push_back(TokenPtr(new BoldOrItalicMarker(false,
		528		closeToken->tokenCharacter(), openToken->size())));
		529		TokenGroup::iterator after=iii;
		530		++after;
		531		tgt.splice(after, g);
		532		continue;
		533		}
		534
		535		if (closeToken->tokenCharacter()==openToken->tokenCharacter()
		536		&& closeToken->size()==openToken->size())
		537		{
		538		openToken->matched(closeToken, id);
		539		closeToken->matched(openToken, id);
		540		++id;
		541		break;
		542		} else if (openToken->size()==3) {
		543		// Split the open-token into a match for the close-token
		544		// and a second for the leftovers.
		545		openToken->disable();
		546		TokenGroup g;
		547		g.push_back(TokenPtr(new BoldOrItalicMarker(true,
		548		openToken->tokenCharacter(), openToken->size()-
		549		closeToken->size())));
		550		g.push_back(TokenPtr(new BoldOrItalicMarker(true,
		551		openToken->tokenCharacter(), closeToken->size())));
		552		TokenGroup::iterator after=ii;
		553		++after;
		554		tgt.splice(after, g);
		555		break;
		556		}
		557		}
		558		}
		559		}
		560		}
		561
		562		// "Unmatch" invalidly-nested matches.
		563		std::stack<BoldOrItalicMarker*> openMatches;
		564		for (TokenGroup::iterator ii=tgt.begin(), iie=tgt.end(); ii!=iie; ++ii) {
		565		if ((*ii)->isMatchedOpenMarker()) {
		566		BoldOrItalicMarker open=dynamic_cast<BoldOrItalicMarker>(ii->get());
		567		openMatches.push(open);
		568		} else if ((*ii)->isMatchedCloseMarker()) {
		569		BoldOrItalicMarker close=dynamic_cast<BoldOrItalicMarker>(ii->get());
		570
		571		if (close->id() != openMatches.top()->id()) {
		572		close->matchedTo()->matched(0);
		573		close->matched(0);
		574		} else {
		575		openMatches.pop();
		576		while (!openMatches.empty() && openMatches.top()->matchedTo()==0)
		577		openMatches.pop();
		578		}
		579		}
		580		}
		581
		582		TokenGroup r;
		583		for (TokenGroup::iterator ii=tgt.begin(), iie=tgt.end(); ii!=iie; ++ii) {
		584		if ((ii)->text() && (ii)->canContainMarkup()) {
		585		TokenGroup t=_encodeProcessedItems((ii)->text(), replacements);
		586		r.splice(r.end(), t);
		587		} else r.push_back(*ii);
		588		}
		589
		590		return r;
		591		}
		592
		593		TokenGroup RawText::_encodeProcessedItems(const std::string &src,
		594		ReplacementTable& replacements)
		595		{
		596		static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
		597
		598		TokenGroup r;
		599		std::string::const_iterator prev=src.begin();
		600		while (1) {
		601		boost::smatch m;
		602		if (boost::regex_search(prev, src.end(), m, cReplaced)) {
		603		std::string pre=std::string(prev, m[0].first);
		604		if (!pre.empty()) r.push_back(TokenPtr(new RawText(pre)));
		605		prev=m[0].second;
		606
		607		std::string ref=m[1];
		608		if (ref[0]=='#') {
		609		size_t n=boost::lexical_cast<size_t>(ref.substr(1));
		610		r.push_back(TokenPtr(new EscapedCharacter(escapedCharacter(n))));
		611		} else if (!ref.empty()) {
		612		size_t n=boost::lexical_cast<size_t>(ref);
		613
		614		assert(n<replacements.size());
		615		r.push_back(replacements[n]);
		616		} // Otherwise just eat it
		617		} else {
		618		std::string pre=std::string(prev, src.end());
		619		if (!pre.empty()) r.push_back(TokenPtr(new RawText(pre)));
		620		break;
		621		}
		622		}
		623		return r;
		624		}
		625
		626		std::string RawText::_restoreProcessedItems(const std::string &src,
		627		ReplacementTable& replacements)
		628		{
		629		static const boost::regex cReplaced("\x01@(#?[0-9]*)@.+?\x01");
		630
		631		std::ostringstream r;
		632		std::string::const_iterator prev=src.begin();
		633		while (1) {
		634		boost::smatch m;
		635		if (boost::regex_search(prev, src.end(), m, cReplaced)) {
		636		std::string pre=std::string(prev, m[0].first);
		637		if (!pre.empty()) r << pre;
		638		prev=m[0].second;
		639
		640		std::string ref=m[1];
		641		if (ref[0]=='#') {
		642		size_t n=boost::lexical_cast<size_t>(ref.substr(1));
		643		r << '\\' << escapedCharacter(n);
		644		} else if (!ref.empty()) {
		645		size_t n=boost::lexical_cast<size_t>(ref);
		646
		647		assert(n<replacements.size());
		648		replacements[n]->writeAsOriginal(r);
		649		} // Otherwise just eat it
		650		} else {
		651		std::string pre=std::string(prev, src.end());
		652		if (!pre.empty()) r << pre;
		653		break;
		654		}
		655		}
		656		return r.str();
		657		}
		658
		659		HtmlAnchorTag::HtmlAnchorTag(const std::string& url, const std::string& title):
		660		TextHolder("<a href=\""+encodeString(url, cQuotes\|cAmps)+"\""
		661		+(title.empty() ? std::string() : " title=\""+encodeString(title, cQuotes\|cAmps)+"\"")
		662		+">", false, 0)
		663		{
		664		// This space deliberately blank. ;-)
		665		}
		666
		667		void CodeBlock::writeAsHtml(std::ostream& out) const {
		668		out << "<pre><code>";
		669		TextHolder::writeAsHtml(out);
		670		out << "</code></pre>\n\n";
		671		}
		672
		673		void CodeSpan::writeAsHtml(std::ostream& out) const {
		674		out << "<code>";
		675		TextHolder::writeAsHtml(out);
		676		out << "</code>";
		677		}
		678
		679		void CodeSpan::writeAsOriginal(std::ostream& out) const {
		680		out << '`' << *text() << '`';
		681		}
		682
		683
		684
		685		void Container::writeAsHtml(std::ostream& out) const {
		686		preWrite(out);
		687		for (CTokenGroupIter i=mSubTokens.begin(), ie=mSubTokens.end(); i!=ie; ++i)
		688		(*i)->writeAsHtml(out);
		689		postWrite(out);
		690		}
		691
		692		void Container::writeToken(size_t indent, std::ostream& out) const {
		693		out << std::string(indent*2, ' ') << containerName() << endl;
		694		for (CTokenGroupIter ii=mSubTokens.begin(), iie=mSubTokens.end(); ii!=iie;
		695		++ii)
		696		(*ii)->writeToken(indent+1, out);
		697		}
		698
		699		optional<TokenGroup> Container::processSpanElements(const LinkIds& idTable) {
		700		TokenGroup t;
		701		for (CTokenGroupIter ii=mSubTokens.begin(), iie=mSubTokens.end(); ii!=iie;
		702		++ii)
		703		{
		704		if ((*ii)->text()) {
		705		optional<TokenGroup> subt=(*ii)->processSpanElements(idTable);
		706		if (subt) {
		707		if (subt->size()>1) t.push_back(TokenPtr(new Container(*subt)));
		708		else if (!subt->empty()) t.push_back(*subt->begin());
		709		} else t.push_back(*ii);
		710		} else {
		711		optional<TokenGroup> subt=(*ii)->processSpanElements(idTable);
		712		if (subt) {
		713		const Container c=dynamic_cast<const Container>((*ii).get());
		714		assert(c!=0);
		715		t.push_back(c->clone(*subt));
		716		} else t.push_back(*ii);
		717		}
		718		}
		719		swapSubtokens(t);
		720		return none;
		721		}
		722
		723		UnorderedList::UnorderedList(const TokenGroup& contents, bool paragraphMode) {
		724		if (paragraphMode) {
		725		// Change each of the text items into paragraphs
		726		for (CTokenGroupIter i=contents.begin(), ie=contents.end(); i!=ie; ++i) {
		727		token::ListItem item=dynamic_cast<token::ListItem>((*i).get());
		728		assert(item!=0);
		729		item->inhibitParagraphs(false);
		730		mSubTokens.push_back(*i);
		731		}
		732		} else mSubTokens=contents;
		733		}
		734
		735
		736
		737		void BoldOrItalicMarker::writeAsHtml(std::ostream& out) const {
		738		if (!mDisabled) {
		739		if (mMatch!=0) {
		740		assert(mSize>=1 && mSize<=3);
		741		if (mOpenMarker) {
		742		out << (mSize==1 ? "<em>" : mSize==2 ? "<strong>" : "<strong><em>");
		743		} else {
		744		out << (mSize==1 ? "</em>" : mSize==2 ? "</strong>" : "</em></strong>");
		745		}
		746		} else out << std::string(mSize, mTokenCharacter);
		747		}
		748		}
		749
		750		void BoldOrItalicMarker::writeToken(std::ostream& out) const {
		751		if (!mDisabled) {
		752		if (mMatch!=0) {
		753		std::string type=(mSize==1 ? "italic" : mSize==2 ? "bold" : "italic&bold");
		754		if (mOpenMarker) {
		755		out << "Matched open-" << type << " marker" << endl;
		756		} else {
		757		out << "Matched close-" << type << " marker" << endl;
		758		}
		759		} else {
		760		if (mOpenMarker) out << "Unmatched bold/italic open marker: " <<
		761		std::string(mSize, mTokenCharacter) << endl;
		762		else out << "Unmatched bold/italic close marker: " <<
		763		std::string(mSize, mTokenCharacter) << endl;
		764		}
		765		}
		766		}
		767
		768		void Image::writeAsHtml(std::ostream& out) const {
		769		out << "<img src=\"" << mUrl << "\" alt=\"" << mAltText << "\"";
		770		if (!mTitle.empty()) out << " title=\"" << mTitle << "\"";
		771		out << "/>";
		772		}
		773
		774		} // namespace token
		775		} // namespace markdown