XML 格式轉換的宣告式語意

從〈查詢的宣告式語意〉提到的實作中,我們可以發現有關資料搜尋的一種設計抽象概念,可以用相同的模式應用在不同的資料結構上,例如從 XML 文件中搜尋特定的資料節點。同人曾經用過宣告式語意實作轉換期貨交易 Span 檔案,將 XML 資料格式轉換成某種特定格式的資料檔案,像下面這段程式碼所示,程式碼的寫法比傳統的命令式語意寫法更為精簡而直覺。

xml::XmlDocument xmlDoc(spanFile, "ISO-8859-1");

std::list<span::EnvMessage> envMsgs =
	xml::XmlTransform<span::EnvMessage>(xmlDoc.find(
	"/spanFile"))(span::WriteEnv());

std::list<span::CurMessage> curMsgs =
	xml::XmlTransform<span::CurMessage>(xmlDoc.find(
	"/spanFile/definitions/currencyDef"))(span::WriteCur());

std::list<span::ChrMessage> chrMsgs =
	xml::XmlTransform<span::ChrMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/curConv"))(
	span::WriteChr());

std::list<span::SccMessage> sccMsgs =
	xml::XmlTransform<span::SccMessage>(xmlDoc.find(
	"/spanFile/definitions/acctTypeDef"))(span::WriteScc());

std::list<span::PbrMessage> pbrMsgs =
	xml::XmlTransform<span::PbrMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/pbRateDef"))(
	span::WritePbr());

std::list<span::FutMessage> futMsgs =
	xml::XmlTransform<span::FutMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/futPf/fut"))(
	span::WriteFut());

std::list<span::OptMessage> optMsg1s =
	xml::XmlTransform<span::OptMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/"
	"oopPf/series/opt"))(
	span::WriteOpt("oopPf"));

std::list<span::OptMessage> optMsg2s =
	xml::XmlTransform<span::OptMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/"
	"oofPf/series/opt"))(
	span::WriteOpt("oofPf"));

std::list<span::CcdMessage> ccdMsgs =
	xml::XmlTransform<span::CcdMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef"))(
	span::WriteCcd());

std::list<span::SdjMessage> sdjMsgs =
	xml::XmlTransform<span::SdjMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef/adjRate"))(
	span::WriteSdj());

std::list<span::PflMessage> pflMsgs =
	xml::XmlTransform<span::PflMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef/pfLink"))(
	span::WritePfl());

std::list<span::ItsMessage> itsMsgs =
	xml::XmlTransform<span::ItsMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/interSpreads/dSpread"))(
	span::WriteIts());

writeMsgLines(envMsgs);
writeMsgLines(curMsgs);
writeMsgLines(chrMsgs);
writeMsgLines(sccMsgs);
writeMsgLines(pbrMsgs);
writeMsgLines(futMsgs);
writeMsgLines(optMsg1s);
writeMsgLines(optMsg2s);
writeMsgLines(ccdMsgs);
writeMsgLines(sdjMsgs);
writeMsgLines(pflMsgs);
writeMsgLines(itsMsgs);

上面這段程式碼明白地表達 XML 格式轉換的意圖,首先從 XML 文件中符合某些搜尋條件的資料節點,轉換成特定格式的訊息,並且收集到訊息佇列之中,最後再把每一個訊息佇列輸出成多行的資料檔案。這很清楚地表示格式轉換包括按照條件搜尋和資料映射的兩個抽象過程,就跟資料表格資料查詢的 where()、以及 select() 一樣,其實說穿了就是 functional programming 對處理資料串列的過濾(filter)與對映(map)的兩大手段,這意味著不管資料來源在現實的實作有多麼大旳差異,但其搜尋資料的操作過程之抽象概念卻是殊途同歸,可以用同樣的方式來表述問題。

在這邊同人定義了 XmlDocument 代表 XML 文件,其定義如下所示:

#ifndef XMLDOCUMENT_H_
#define XMLDOCUMENT_H_

#include <algorithm>
#include <memory>
#include <list>
#include <string>
#include <vector>

#include "libxml/parser.h"
#include "libxml/xpath.h"

using namespace std;
namespace xml {

class XmlNode;

class XmlDocument {
private:
	shared_ptr<xmlDoc> _doc;

private:
	friend class XmlNode;
	XmlDocument(xmlDocPtr doc);

public:
	XmlDocument(const char* filename, const char* encoding);
	virtual ~XmlDocument();

public:
	list<XmlNode> find(const char* xpath);

};

class XmlNode {
protected:
	xmlNodePtr _node;
	XmlDocument* _doc;

protected:
	friend class XmlDocument;
	XmlNode(XmlDocument* doc, xmlNodePtr node);

private:
	static void parseFoundPath(const char* path,
		vector<string>& nodeNames);

public:
	XmlNode(const XmlNode&);
	virtual ~XmlNode();

public:
	operator bool() const;

public:
	string getName() const;
	string getContent() const;

	XmlNode getFirstChildren() const;
	XmlNode getLastChildren() const;
	XmlNode getNext() const;
	XmlNode getPrevious() const;
	XmlNode getParent() const;

	XmlDocument* getDocument() const;

public:
	XmlNode find(const char* path) const;

};

template <typename T>
class XmlTransform {
private:
	const list<XmlNode>& _nodes;

public:
	XmlTransform(const list<XmlNode>& nodes) : _nodes(nodes) {
	}

	~XmlTransform() {
	}

public:
	template <typename UnaryOperation>
	list<T> operator() (UnaryOperation op) {
		list<T> result;

		result.resize(_nodes.size());
		transform(_nodes.begin(), _nodes.end(),
			result.begin(), op);
		return result;
	}
};

template <typename T>
class XmlTransformationBuilder {
protected:
	T& _result;

public:
	XmlTransformationBuilder(T& result) : _result(result) {
	}

	virtual ~XmlTransformationBuilder() {
	}

public:
	virtual void build(XmlNode& node) = 0;
};

template <typename T>
class XmlTransformationEnumerator {
private:
	XmlTransformationBuilder<T>* _builder;

public:
	XmlTransformationEnumerator(
		XmlTransformationBuilder<T>* builder) :
		_builder(builder) {
	}

	virtual ~XmlTransformationEnumerator() {
	}

public:
	void enumerate(const XmlNode& node) {
		xml::XmlNode children = node.getFirstChildren();
		while (children) {
			_builder->build(children);
			children = children.getNext();
		}
	}

};

}
#endif //XMLDOCUMENT_H_

以上 XmlDocument.h 的定義,除了與 XML 節點資料映射有關的 class template 之外,我們看到對 XmlDocumnet 進行 find() 操作可以得到符合 XPath 搜尋條件的 XmlNode 清單,而對 XmlNode 可以進行循訪或搜尋特定節點的操作而傳回其它的 XmlNode。這些操作是用 Libxml2 來實作,如下面這段程式碼所示:

#include <cstring>
#include <stdexcept>
#include <vector>
#include <stack>

#include "libxml/xpath.h"
#include "XmlDocument.h"

#define PARENT_NODENAME		".."
#define ANY_NODENAME		"*"
#define CURRENT_NODENAME	"."

using namespace std;

namespace xml {

XmlDocument::XmlDocument(xmlDocPtr doc) {
	_doc.reset(doc, xmlFreeDoc);
}

XmlDocument::XmlDocument(const char* filename,
	const char* encoding) :
	_doc(shared_ptr<xmlDoc>(xmlReadFile(filename, encoding, 0),
		xmlFreeDoc)) {
}

XmlDocument::~XmlDocument() {
}

list<XmlNode> XmlDocument::find(const char* xpath) {
	list<XmlNode> nodelist;
	nodelist.clear();

	if (_doc == NULL) {
		return nodelist;
	}

	xmlXPathContextPtr xpathContext = xmlXPathNewContext(
		_doc.get());
	shared_ptr<xmlXPathContext> context =
		shared_ptr<xmlXPathContext>(xpathContext ,
		xmlXPathFreeContext);

	if (context) {
		xmlXPathObject* xPathObjPtr =
			xmlXPathEvalExpression((xmlChar*) xpath,
			context.get());
		shared_ptr<xmlXPathObject> xpathobj =
			shared_ptr<xmlXPathObject>(xPathObjPtr,
			xmlXPathFreeObject);
		if (xpathobj) {
			xmlNodeSetPtr nodeset = xpathobj->nodesetval;
			for (int i = 0; i < nodeset->nodeNr; i ++) {
				XmlNode node =
					XmlNode(this, nodeset->nodeTab[i]);
				nodelist.push_back(node);
			}
		}
	}

	return nodelist;
}

XmlNode::XmlNode(XmlDocument* doc, xmlNodePtr node) :
	_node(node), _doc(doc) {
}

XmlNode::XmlNode(const XmlNode& other) :
	_node(other._node), _doc(other._doc) {
}

XmlNode::~XmlNode() {
}

XmlNode::operator bool() const {
	return (_node != NULL);
}

string XmlNode::getName() const {
	string result;
	if (_node != NULL) {
		if (_node->type == XML_ELEMENT_NODE) {
			result = string((const char*) _node->name);
		}
	}
	return result;
}

string XmlNode::getContent() const {
	string result;
	if (_node != NULL) {
		if (_node->children != NULL &&
			_node->children->content != NULL) {
			result.assign((char*) _node->children->content);
		}
	}
	return result;
}

XmlNode XmlNode::getFirstChildren() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlFirstElementChild(_node) : NULL);
}

XmlNode XmlNode::getLastChildren() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlLastElementChild(_node) : NULL);
}

XmlNode XmlNode::getNext() const {
	return XmlNode(_doc,  (_node != NULL) ?
		xmlNextElementSibling(_node) : NULL);
}

XmlNode XmlNode::getPrevious() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlPreviousElementSibling(_node) : NULL);
}

XmlNode XmlNode::getParent() const {
	return XmlNode(_doc, (_node != NULL) ?
		_node->parent : NULL);
}

XmlDocument* XmlNode::getDocument() const {
	return _doc;
}

void XmlNode::parseFoundPath(const char* path,
	vector<string>& nodeNames) {
	string s = path;
	size_t pos = 0, found;
	while ((found = s.find('/', pos)) != string::npos) {
		nodeNames.push_back(s.substr(pos, found - pos));
		pos = (found + 1);
	}
	string lastNode = s.substr(pos);
	if (!lastNode.empty()) {
		nodeNames.push_back(lastNode);
	}
}

XmlNode XmlNode::find(const char* path) const {
	vector<string> nodeNames;
	parseFoundPath(path, nodeNames);
	stack<XmlNode> nodeStack;

	XmlNode node = *this;
	for (int i = 0; i < nodeNames.size(); i ++) {
		if (nodeNames[i].empty() || nodeNames[i] ==
			CURRENT_NODENAME) {
			continue;
		}
		else if (nodeNames[i] == PARENT_NODENAME) {
			node = node.getParent();
			continue;
		}
		else {
			node = node.getFirstChildren();
			bool found = false;
			do {
				while (node && !found) {
					if (node.getName() == nodeNames[i]) {
						nodeStack.push(node);
						found = true;
					}
					else {
						node = node.getNext();
					}
				}
				if (!found) {
					if (!nodeStack.empty()) {
						node = nodeStack.top();
						nodeStack.pop();
						node = node.getNext();
						i --;
					}
					else {
						return XmlNode(_doc, NULL);
					}
				}
			} while (node && !found);
		}
	}
	return node;
}

}

關於 XML 節點資料映射的程式,其中 XmlTransform<T> 是用來建構一個 list<XmlNode> 的 function object,它的函式運作是傳入是資料映射的函式或仿函式,用以回傳資料型態為 list<T> 的轉換結果,是透過 std::transform() 來實作。而 XmlTransformationEnumerator<T> 和 XmlTransformationBuilder<T> 則是應用 GOF Builder Pattern 來抽象化映射整個 XmlNode 的建造過程,這是考量資料節點包含順序及階層的結構,實際的使用方式可以參看下面的範例:

class FixPointParse {
private:
	int _integerSize;
	int _decimalSize;

public:
	FixPointParse(int integerSize, int decimalSize);
	virtual ~FixPointParse();

public:
	string operator() (string text);
};

class SubStringParse {
private:
	size_t _pos;
	size_t _len;
public:
	SubStringParse(size_t pos, size_t len);
	virtual ~SubStringParse();

public:
	string operator() (string text);

};

class FutTransformationBuilder :
	public xml::XmlTransformationBuilder<FutMessage> {
private:
	class DataRaBuilder :
		public xml::XmlTransformationBuilder<FutMessage> {
	private:
		int _riskCount;

	public:
		DataRaBuilder(FutMessage& futMessage) :
			xml::XmlTransformationBuilder<FutMessage>(
			futMessage), _riskCount(0) {
		}

		virtual ~DataRaBuilder() {
		}

		void build(xml::XmlNode& node);
	};	

public:
	FutTransformationBuilder(FutMessage& futMessage) :
		xml::XmlTransformationBuilder<FutMessage>(futMessage) {
	}

	virtual ~FutTransformationBuilder() {
	}

	void build(xml::XmlNode& node);

};

FixPointParse::FixPointParse(int integerSize, int decimalSize) :
	_integerSize(integerSize), _decimalSize(decimalSize) {
}

FixPointParse::~FixPointParse() {
}

string FixPointParse::operator() (string text) {
	string result(_integerSize + _decimalSize, '0');

	if (!text.empty()) {
		ostringstream oss;
		oss.width(_integerSize + _decimalSize +
			((_decimalSize > 0) ? 1 : 0));
		oss.precision(_decimalSize);
		oss.fill('0');
		oss << fixed << internal <<
			atof(text.c_str());
		result = oss.str();
		size_t found_point = result.find('.');
		if (found_point != string::npos) {
			result.erase(found_point, 1);
		}
	}
	return result;
}

SubStringParse::SubStringParse(size_t pos, size_t len) :
	_pos(pos), _len(len) {
}

SubStringParse::~SubStringParse() {
}

string SubStringParse::operator() (string text) {
	ostringstream oss;
	oss.width(_len);
	oss.fill(' ');
	if (_pos < text.size()) {
		oss << std::left << text.substr(_pos, _len);
	}
	else {
		return string(_len, ' ');
	}
	return oss.str();
}

void FutTransformationBuilder::build(xml::XmlNode& node) {
	string nodeName = node.getName();
	string nodeContent = node.getContent();
	if (nodeName == "cId") {
		_result.cid = FixPointParse(4, 0)(nodeContent);
	}
	else if (nodeName == "pe") {
		_result.pe = SubStringParse(0, 6)(nodeContent);
	}
	else if (nodeName == "setlDate") {
		_result.setldate = FixPointParse(8, 0)(nodeContent);
	}
	else if (nodeName == "p") {
		_result.p = FixPointParse(8, 3)(nodeContent);
	}
	else if (nodeName == "sc") {
		_result.sc = FixPointParse(2, 6)(nodeContent);
	}
	else if (nodeName == "scanRate") {
		_result.pricescan = FixPointParse(8, 6)(
			node.find("priceScan").getContent());
	}
	else if (nodeName == "ra") {
		DataRaBuilder dataRaBuilder(_result);
		xml::XmlTransformationEnumerator<FutMessage>(
			&dataRaBuilder).enumerate(node);
	}
}

void FutTransformationBuilder::DataRaBuilder::build(
	xml::XmlNode& node) {
	string nodeName = node.getName();
	string nodeContent = node.getContent();
	if (nodeName == "d") {
		_result.delta = FixPointParse(3, 6)(nodeContent);
	}
	else if (nodeName == "a") {
		if (_riskCount < FUT_RISK_MAX_COUNT) {
			_result.risk += FixPointParse(8, 2)(nodeContent);
		}
		_riskCount ++;
	}
}

FutMessage WriteFut::operator() (const xml::XmlNode& node) {
	FutMessage result;

	xml::XmlNode futPfNode = node.getParent();
	result.pfid = FixPointParse(4, 0)(
		futPfNode.find("pfId").getContent());
	result.pfcode = SubStringParse(0, 3)(
		futPfNode.find("pfCode").getContent());
	result.currency = SubStringParse(0, 3)(
		futPfNode.find("currency").getContent());	

	FutTransformationBuilder futBuilder(result);
	xml::XmlTransformationEnumerator<FutMessage>(
		&futBuilder).enumerate(node);
	return result;
}

希望從以上及前幾篇文章提到程式碼的示範,可以讓大家感受一點宣告式語意和命令式語意表達方式的差別。當然宣告式語意不見得會比命令式語意的寫法更好,但依照同人程式開發經驗的體會,宣告式語意常常能帶來一些全新的思維,讓我們用更簡單而更有創意的做法來解決問題。讓程式開發更輕裝而行,有時候用物件導向的手法適時搭配泛函編程的抽象化,反而會讓看起來複雜的問題,不可思議地迎刃而解。

《易經繫辭》曰:「通其變,使民不倦,神而化之,使民宜之。易窮則變,變則通,通則久。」所以當我們習慣的命令式語意寫法讓程式碼變得複雜時,不妨嘗試用宣告式語意的寫法能不能讓解決問題更簡單。改變問題思考方向並不但不會讓人倦怠,而是會發現更有趣的桃花源地,程式開發的創意又怎能窮盡的枯竭呢?

Please follow and like us:
分類: 分析設計建模, 學習, 易經思維, 編程技巧。這篇內容的永久連結

在〈XML 格式轉換的宣告式語意〉中有 1 則留言

  1. 自動引用通知: 以泛函編程增進功能的可測性 « 同人的生活派對

發佈留言

發佈留言必須填寫的電子郵件地址不會公開。 必填欄位標示為 *