jim yeh on 四月 22nd, 2014

從〈查詢的宣告式語意〉提到的實作中,我們可以發現有關資料搜尋的一種設計抽象概念,可以用相同的模式應用在不同的資料結構上,例如從 XML 文件中搜尋特定的資料節點。同人曾經用過宣告式語意實作轉換期貨交易 Span 檔案,將 XML 資料格式轉換成某種特定格式的資料檔案,像下面這段程式碼所示,程式碼的寫法比傳統的命令式語意寫法更為精簡而直覺。

xml::XmlDocument xmlDoc(spanFile, "ISO-8859-1");

std::list<span::EnvMessage> envMsgs =
	xml::XmlTransform<span::EnvMessage>(xmlDoc.find(
	"/spanFile"))(span::WriteEnv());

std::list<span::CurMessage> curMsgs =
	xml::XmlTransform<span::CurMessage>(xmlDoc.find(
	"/spanFile/definitions/currencyDef"))(span::WriteCur());

std::list<span::ChrMessage> chrMsgs =
	xml::XmlTransform<span::ChrMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/curConv"))(
	span::WriteChr());

std::list<span::SccMessage> sccMsgs =
	xml::XmlTransform<span::SccMessage>(xmlDoc.find(
	"/spanFile/definitions/acctTypeDef"))(span::WriteScc());

std::list<span::PbrMessage> pbrMsgs =
	xml::XmlTransform<span::PbrMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/pbRateDef"))(
	span::WritePbr());

std::list<span::FutMessage> futMsgs =
	xml::XmlTransform<span::FutMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/futPf/fut"))(
	span::WriteFut());

std::list<span::OptMessage> optMsg1s =
	xml::XmlTransform<span::OptMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/"
	"oopPf/series/opt"))(
	span::WriteOpt("oopPf"));

std::list<span::OptMessage> optMsg2s =
	xml::XmlTransform<span::OptMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/exchange/"
	"oofPf/series/opt"))(
	span::WriteOpt("oofPf"));

std::list<span::CcdMessage> ccdMsgs =
	xml::XmlTransform<span::CcdMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef"))(
	span::WriteCcd());

std::list<span::SdjMessage> sdjMsgs =
	xml::XmlTransform<span::SdjMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef/adjRate"))(
	span::WriteSdj());

std::list<span::PflMessage> pflMsgs =
	xml::XmlTransform<span::PflMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/ccDef/pfLink"))(
	span::WritePfl());

std::list<span::ItsMessage> itsMsgs =
	xml::XmlTransform<span::ItsMessage>(xmlDoc.find(
	"/spanFile/pointInTime/clearingOrg/interSpreads/dSpread"))(
	span::WriteIts());

writeMsgLines(envMsgs);
writeMsgLines(curMsgs);
writeMsgLines(chrMsgs);
writeMsgLines(sccMsgs);
writeMsgLines(pbrMsgs);
writeMsgLines(futMsgs);
writeMsgLines(optMsg1s);
writeMsgLines(optMsg2s);
writeMsgLines(ccdMsgs);
writeMsgLines(sdjMsgs);
writeMsgLines(pflMsgs);
writeMsgLines(itsMsgs);

上面這段程式碼明白地表達 XML 格式轉換的意圖,首先從 XML 文件中符合某些搜尋條件的資料節點,轉換成特定格式的訊息,並且收集到訊息佇列之中,最後再把每一個訊息佇列輸出成多行的資料檔案。這很清楚地表示格式轉換包括按照條件搜尋和資料映射的兩個抽象過程,就跟資料表格資料查詢的 where()、以及 select() 一樣,其實說穿了就是 functional programming 對處理資料串列的過濾(filter)與對映(map)的兩大手段,這意味著不管資料來源在現實的實作有多麼大旳差異,但其搜尋資料的操作過程之抽象概念卻是殊途同歸,可以用同樣的方式來表述問題。

在這邊同人定義了 XmlDocument 代表 XML 文件,其定義如下所示:

#ifndef XMLDOCUMENT_H_
#define XMLDOCUMENT_H_

#include <algorithm>
#include <memory>
#include <list>
#include <string>
#include <vector>

#include "libxml/parser.h"
#include "libxml/xpath.h"

using namespace std;
namespace xml {

class XmlNode;

class XmlDocument {
private:
	shared_ptr<xmlDoc> _doc;

private:
	friend class XmlNode;
	XmlDocument(xmlDocPtr doc);

public:
	XmlDocument(const char* filename, const char* encoding);
	virtual ~XmlDocument();

public:
	list<XmlNode> find(const char* xpath);

};

class XmlNode {
protected:
	xmlNodePtr _node;
	XmlDocument* _doc;

protected:
	friend class XmlDocument;
	XmlNode(XmlDocument* doc, xmlNodePtr node);

private:
	static void parseFoundPath(const char* path,
		vector<string>& nodeNames);

public:
	XmlNode(const XmlNode&);
	virtual ~XmlNode();

public:
	operator bool() const;

public:
	string getName() const;
	string getContent() const;

	XmlNode getFirstChildren() const;
	XmlNode getLastChildren() const;
	XmlNode getNext() const;
	XmlNode getPrevious() const;
	XmlNode getParent() const;

	XmlDocument* getDocument() const;

public:
	XmlNode find(const char* path) const;

};

template <typename T>
class XmlTransform {
private:
	const list<XmlNode>& _nodes;

public:
	XmlTransform(const list<XmlNode>& nodes) : _nodes(nodes) {
	}

	~XmlTransform() {
	}

public:
	template <typename UnaryOperation>
	list<T> operator() (UnaryOperation op) {
		list<T> result;

		result.resize(_nodes.size());
		transform(_nodes.begin(), _nodes.end(),
			result.begin(), op);
		return result;
	}
};

template <typename T>
class XmlTransformationBuilder {
protected:
	T& _result;

public:
	XmlTransformationBuilder(T& result) : _result(result) {
	}

	virtual ~XmlTransformationBuilder() {
	}

public:
	virtual void build(XmlNode& node) = 0;
};

template <typename T>
class XmlTransformationEnumerator {
private:
	XmlTransformationBuilder<T>* _builder;

public:
	XmlTransformationEnumerator(
		XmlTransformationBuilder<T>* builder) :
		_builder(builder) {
	}

	virtual ~XmlTransformationEnumerator() {
	}

public:
	void enumerate(const XmlNode& node) {
		xml::XmlNode children = node.getFirstChildren();
		while (children) {
			_builder->build(children);
			children = children.getNext();
		}
	}

};

}
#endif //XMLDOCUMENT_H_

以上 XmlDocument.h 的定義,除了與 XML 節點資料映射有關的 class template 之外,我們看到對 XmlDocumnet 進行 find() 操作可以得到符合 XPath 搜尋條件的 XmlNode 清單,而對 XmlNode 可以進行循訪或搜尋特定節點的操作而傳回其它的 XmlNode。這些操作是用 Libxml2 來實作,如下面這段程式碼所示:

#include <cstring>
#include <stdexcept>
#include <vector>
#include <stack>

#include "libxml/xpath.h"
#include "XmlDocument.h"

#define PARENT_NODENAME		".."
#define ANY_NODENAME		"*"
#define CURRENT_NODENAME	"."

using namespace std;

namespace xml {

XmlDocument::XmlDocument(xmlDocPtr doc) {
	_doc.reset(doc, xmlFreeDoc);
}

XmlDocument::XmlDocument(const char* filename,
	const char* encoding) :
	_doc(shared_ptr<xmlDoc>(xmlReadFile(filename, encoding, 0),
		xmlFreeDoc)) {
}

XmlDocument::~XmlDocument() {
}

list<XmlNode> XmlDocument::find(const char* xpath) {
	list<XmlNode> nodelist;
	nodelist.clear();

	if (_doc == NULL) {
		return nodelist;
	}

	xmlXPathContextPtr xpathContext = xmlXPathNewContext(
		_doc.get());
	shared_ptr<xmlXPathContext> context =
		shared_ptr<xmlXPathContext>(xpathContext ,
		xmlXPathFreeContext);

	if (context) {
		xmlXPathObject* xPathObjPtr =
			xmlXPathEvalExpression((xmlChar*) xpath,
			context.get());
		shared_ptr<xmlXPathObject> xpathobj =
			shared_ptr<xmlXPathObject>(xPathObjPtr,
			xmlXPathFreeObject);
		if (xpathobj) {
			xmlNodeSetPtr nodeset = xpathobj->nodesetval;
			for (int i = 0; i < nodeset->nodeNr; i ++) {
				XmlNode node =
					XmlNode(this, nodeset->nodeTab[i]);
				nodelist.push_back(node);
			}
		}
	}

	return nodelist;
}

XmlNode::XmlNode(XmlDocument* doc, xmlNodePtr node) :
	_node(node), _doc(doc) {
}

XmlNode::XmlNode(const XmlNode& other) :
	_node(other._node), _doc(other._doc) {
}

XmlNode::~XmlNode() {
}

XmlNode::operator bool() const {
	return (_node != NULL);
}

string XmlNode::getName() const {
	string result;
	if (_node != NULL) {
		if (_node->type == XML_ELEMENT_NODE) {
			result = string((const char*) _node->name);
		}
	}
	return result;
}

string XmlNode::getContent() const {
	string result;
	if (_node != NULL) {
		if (_node->children != NULL &&
			_node->children->content != NULL) {
			result.assign((char*) _node->children->content);
		}
	}
	return result;
}

XmlNode XmlNode::getFirstChildren() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlFirstElementChild(_node) : NULL);
}

XmlNode XmlNode::getLastChildren() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlLastElementChild(_node) : NULL);
}

XmlNode XmlNode::getNext() const {
	return XmlNode(_doc,  (_node != NULL) ?
		xmlNextElementSibling(_node) : NULL);
}

XmlNode XmlNode::getPrevious() const {
	return XmlNode(_doc, (_node != NULL) ?
		xmlPreviousElementSibling(_node) : NULL);
}

XmlNode XmlNode::getParent() const {
	return XmlNode(_doc, (_node != NULL) ?
		_node->parent : NULL);
}

XmlDocument* XmlNode::getDocument() const {
	return _doc;
}

void XmlNode::parseFoundPath(const char* path,
	vector<string>& nodeNames) {
	string s = path;
	size_t pos = 0, found;
	while ((found = s.find('/', pos)) != string::npos) {
		nodeNames.push_back(s.substr(pos, found - pos));
		pos = (found + 1);
	}
	string lastNode = s.substr(pos);
	if (!lastNode.empty()) {
		nodeNames.push_back(lastNode);
	}
}

XmlNode XmlNode::find(const char* path) const {
	vector<string> nodeNames;
	parseFoundPath(path, nodeNames);
	stack<XmlNode> nodeStack;

	XmlNode node = *this;
	for (int i = 0; i < nodeNames.size(); i ++) {
		if (nodeNames[i].empty() || nodeNames[i] ==
			CURRENT_NODENAME) {
			continue;
		}
		else if (nodeNames[i] == PARENT_NODENAME) {
			node = node.getParent();
			continue;
		}
		else {
			node = node.getFirstChildren();
			bool found = false;
			do {
				while (node && !found) {
					if (node.getName() == nodeNames[i]) {
						nodeStack.push(node);
						found = true;
					}
					else {
						node = node.getNext();
					}
				}
				if (!found) {
					if (!nodeStack.empty()) {
						node = nodeStack.top();
						nodeStack.pop();
						node = node.getNext();
						i --;
					}
					else {
						return XmlNode(_doc, NULL);
					}
				}
			} while (node && !found);
		}
	}
	return node;
}

}

關於 XML 節點資料映射的程式,其中 XmlTransform<T> 是用來建構一個 list<XmlNode> 的 function object,它的函式運作是傳入是資料映射的函式或仿函式,用以回傳資料型態為 list<T> 的轉換結果,是透過 std::transform() 來實作。而 XmlTransformationEnumerator<T> 和 XmlTransformationBuilder<T> 則是應用 GOF Builder Pattern 來抽象化映射整個 XmlNode 的建造過程,這是考量資料節點包含順序及階層的結構,實際的使用方式可以參看下面的範例:

class FixPointParse {
private:
	int _integerSize;
	int _decimalSize;

public:
	FixPointParse(int integerSize, int decimalSize);
	virtual ~FixPointParse();

public:
	string operator() (string text);
};

class SubStringParse {
private:
	size_t _pos;
	size_t _len;
public:
	SubStringParse(size_t pos, size_t len);
	virtual ~SubStringParse();

public:
	string operator() (string text);

};

class FutTransformationBuilder :
	public xml::XmlTransformationBuilder<FutMessage> {
private:
	class DataRaBuilder :
		public xml::XmlTransformationBuilder<FutMessage> {
	private:
		int _riskCount;

	public:
		DataRaBuilder(FutMessage& futMessage) :
			xml::XmlTransformationBuilder<FutMessage>(
			futMessage), _riskCount(0) {
		}

		virtual ~DataRaBuilder() {
		}

		void build(xml::XmlNode& node);
	};	

public:
	FutTransformationBuilder(FutMessage& futMessage) :
		xml::XmlTransformationBuilder<FutMessage>(futMessage) {
	}

	virtual ~FutTransformationBuilder() {
	}

	void build(xml::XmlNode& node);

};

FixPointParse::FixPointParse(int integerSize, int decimalSize) :
	_integerSize(integerSize), _decimalSize(decimalSize) {
}

FixPointParse::~FixPointParse() {
}

string FixPointParse::operator() (string text) {
	string result(_integerSize + _decimalSize, '0');

	if (!text.empty()) {
		ostringstream oss;
		oss.width(_integerSize + _decimalSize +
			((_decimalSize > 0) ? 1 : 0));
		oss.precision(_decimalSize);
		oss.fill('0');
		oss << fixed << internal <<
			atof(text.c_str());
		result = oss.str();
		size_t found_point = result.find('.');
		if (found_point != string::npos) {
			result.erase(found_point, 1);
		}
	}
	return result;
}

SubStringParse::SubStringParse(size_t pos, size_t len) :
	_pos(pos), _len(len) {
}

SubStringParse::~SubStringParse() {
}

string SubStringParse::operator() (string text) {
	ostringstream oss;
	oss.width(_len);
	oss.fill(' ');
	if (_pos < text.size()) {
		oss << std::left << text.substr(_pos, _len);
	}
	else {
		return string(_len, ' ');
	}
	return oss.str();
}

void FutTransformationBuilder::build(xml::XmlNode& node) {
	string nodeName = node.getName();
	string nodeContent = node.getContent();
	if (nodeName == "cId") {
		_result.cid = FixPointParse(4, 0)(nodeContent);
	}
	else if (nodeName == "pe") {
		_result.pe = SubStringParse(0, 6)(nodeContent);
	}
	else if (nodeName == "setlDate") {
		_result.setldate = FixPointParse(8, 0)(nodeContent);
	}
	else if (nodeName == "p") {
		_result.p = FixPointParse(8, 3)(nodeContent);
	}
	else if (nodeName == "sc") {
		_result.sc = FixPointParse(2, 6)(nodeContent);
	}
	else if (nodeName == "scanRate") {
		_result.pricescan = FixPointParse(8, 6)(
			node.find("priceScan").getContent());
	}
	else if (nodeName == "ra") {
		DataRaBuilder dataRaBuilder(_result);
		xml::XmlTransformationEnumerator<FutMessage>(
			&dataRaBuilder).enumerate(node);
	}
}

void FutTransformationBuilder::DataRaBuilder::build(
	xml::XmlNode& node) {
	string nodeName = node.getName();
	string nodeContent = node.getContent();
	if (nodeName == "d") {
		_result.delta = FixPointParse(3, 6)(nodeContent);
	}
	else if (nodeName == "a") {
		if (_riskCount < FUT_RISK_MAX_COUNT) {
			_result.risk += FixPointParse(8, 2)(nodeContent);
		}
		_riskCount ++;
	}
}

FutMessage WriteFut::operator() (const xml::XmlNode& node) {
	FutMessage result;

	xml::XmlNode futPfNode = node.getParent();
	result.pfid = FixPointParse(4, 0)(
		futPfNode.find("pfId").getContent());
	result.pfcode = SubStringParse(0, 3)(
		futPfNode.find("pfCode").getContent());
	result.currency = SubStringParse(0, 3)(
		futPfNode.find("currency").getContent());	

	FutTransformationBuilder futBuilder(result);
	xml::XmlTransformationEnumerator<FutMessage>(
		&futBuilder).enumerate(node);
	return result;
}

希望從以上及前幾篇文章提到程式碼的示範,可以讓大家感受一點宣告式語意和命令式語意表達方式的差別。當然宣告式語意不見得會比命令式語意的寫法更好,但依照同人程式開發經驗的體會,宣告式語意常常能帶來一些全新的思維,讓我們用更簡單而更有創意的做法來解決問題。讓程式開發更輕裝而行,有時候用物件導向的手法適時搭配泛函編程的抽象化,反而會讓看起來複雜的問題,不可思議地迎刃而解。

《易經繫辭》曰:「通其變,使民不倦,神而化之,使民宜之。易窮則變,變則通,通則久。」所以當我們習慣的命令式語意寫法讓程式碼變得複雜時,不妨嘗試用宣告式語意的寫法能不能讓解決問題更簡單。改變問題思考方向並不但不會讓人倦怠,而是會發現更有趣的桃花源地,程式開發的創意又怎能窮盡的枯竭呢?



     

One Response to “XML 格式轉換的宣告式語意”

  1. [...] 我們看到這樣程式碼更專注於它們各自的職責,而且 PartOfList 也可以重覆使用在其它適用的情況,更重要的是程式碼的驗證是可行的,連帶程式碼的可讀性、擴充性、以及彈性都得到加強。這又一次地讓人體會到泛函編程宣告式語意的簡潔有力,搭配與物件導向的思維的整合,無疑是提升系統抽象概念堪稱完美的組合。       Posted by jim yeh 分析設計建模, 問題解決, 生活感觸, 編程技巧, 職場, 設計原則 Subscribe to RSS feed [...]

Leave a Reply

You can use these tags: <a href="" title=""> <abbr title=""> <acronym title=""> <b> <blockquote cite=""> <cite> <code> <del datetime=""> <em> <i> <q cite=""> <strike> <strong> <pre class="">