起因
在工作中遇到一段有层次结构的数据被扁平化后,需要再次还原层次数据结构的问题。
原始数据
数据表达的是某个号码的各种费用,以月份为周期,然后展示了该月份下面的费用详情,该数据是有层次的数据:
Acct-ID XXXXXXXXX
PaymentFlag 0
Acc-Nbr XXXXXXXXXXX
Fee-Billing-Cycle 4
Billing-Cycle-ID 20191001
Sum_Charge 26538
ChargePayed 18
Acct_Item_Group 2
Acct_Item_Group-Id 0
Acct-Item-Type 5
Acct-Item-Type-NameA费用
Acct-Item-Charge 100
Acct-Item-Type-NameB费用
Acct-Item-Charge 11800
Acct-Item-Type-NameC费用
Acct-Item-Charge 5100
Acct-Item-Type-NameD费用
Acct-Item-Charge 982
Acct-Item-Type-NameE费用
Acct-Item-Charge 3000
Acct_Item_Group-Id 1
Acct-Item-Type 1
Acct-Item-Type-Name 滞纳金
Acct-Item-Charge 5538
Billing-Cycle-ID 20191101
Sum_Charge 24718
ChargePayed 0
Acct_Item_Group 2
Acct_Item_Group-Id 0
Acct-Item-Type 5
Acct-Item-Type-NameA费用
Acct-Item-Charge 100
Acct-Item-Type-NameB费用
Acct-Item-Charge 11800
Acct-Item-Type-NameC费用
Acct-Item-Charge 5100
Acct-Item-Type-NameD费用
Acct-Item-Charge 1000
Acct-Item-Type-NameE费用
Acct-Item-Charge 3000
Acct_Item_Group-Id 1
Acct-Item-Type 1
Acct-Item-Type-Name 滞纳金
Acct-Item-Charge 3718
Billing-Cycle-ID 20191201
Sum_Charge 22763
ChargePayed 0
Acct_Item_Group 2
Acct_Item_Group-Id 0
Acct-Item-Type 5
Acct-Item-Type-NameA费用
Acct-Item-Charge 100
Acct-Item-Type-NameB费用
Acct-Item-Charge 11800
Acct-Item-Type-NameC费用
Acct-Item-Charge 5100
Acct-Item-Type-NameD费用
Acct-Item-Charge 1000
Acct-Item-Type-NameE费用
Acct-Item-Charge 3000
Acct_Item_Group-Id 1
Acct-Item-Type 1
Acct-Item-Type-Name 滞纳金
Acct-Item-Charge 1763
Billing-Cycle-ID 20200101
Sum_Charge 21000
ChargePayed 0
Acct_Item_Group 2
Acct_Item_Group-Id 0
Acct-Item-Type 5
Acct-Item-Type-NameA费用
Acct-Item-Charge 100
Acct-Item-Type-NameB费用
Acct-Item-Charge 11800
Acct-Item-Type-NameC费用
Acct-Item-Charge 5100
Acct-Item-Type-NameD费用
Acct-Item-Charge 1000
Acct-Item-Type-NameE费用
Acct-Item-Charge 3000
Acct_Item_Group-Id 1
Acct-Item-Type 1
Acct-Item-Type-Name 滞纳金
Acct-Item-Charge 0
扁平化后的数据
XML格式的数据,别人返回的数据,现在的需求是根据这个XML数据推断出原始数据是什么
<?xml version="1.0" encoding="GBK"?>
<root>
<retCode>0</retCode>
<dataTable>
<list>
<form>
<resultcode>0</resultcode>
<resultmsg/>
<acctid>XXXXXXXXX</acctid>
<paymentmethod>0</paymentmethod>
<accnbr>XXXXXXXXXXX</accnbr>
<control101>4</control101>
<billingcycle>20191001</billingcycle>
<totalfee>26538</totalfee>
<billfee>18</billfee>
<control201>2</control201>
<ptyroleid>0</ptyroleid>
<control301>5</control301>
<acctitemtypename>A费用</acctitemtypename>
<dccowedigits>100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle>20191101</billingcycle>
<totalfee>24718</totalfee>
<billfee>0</billfee>
<control201>2</control201>
<ptyroleid>1</ptyroleid>
<control301>1</control301>
<acctitemtypename>B费用</acctitemtypename>
<dccowedigits>11800</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle>20191201</billingcycle>
<totalfee>22763</totalfee>
<billfee>0</billfee>
<control201>2</control201>
<ptyroleid>0</ptyroleid>
<control301>5</control301>
<acctitemtypename>C费用</acctitemtypename>
<dccowedigits>5100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle>20200101</billingcycle>
<totalfee>21000</totalfee>
<billfee>0</billfee>
<control201>2</control201>
<ptyroleid>1</ptyroleid>
<control301>1</control301>
<acctitemtypename>D费用</acctitemtypename>
<dccowedigits>982</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid>0</ptyroleid>
<control301>5</control301>
<acctitemtypename>E费用</acctitemtypename>
<dccowedigits>3000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid>1</ptyroleid>
<control301>1</control301>
<acctitemtypename>滞纳金</acctitemtypename>
<dccowedigits>5538</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid>0</ptyroleid>
<control301>5</control301>
<acctitemtypename>A费用</acctitemtypename>
<dccowedigits>100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid>1</ptyroleid>
<control301>1</control301>
<acctitemtypename>B费用</acctitemtypename>
<dccowedigits>11800</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>C费用</acctitemtypename>
<dccowedigits>5100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>D费用</acctitemtypename>
<dccowedigits>1000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>E费用</acctitemtypename>
<dccowedigits>3000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>滞纳金</acctitemtypename>
<dccowedigits>3718</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>A费用</acctitemtypename>
<dccowedigits>100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>B费用</acctitemtypename>
<dccowedigits>11800</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>C费用</acctitemtypename>
<dccowedigits>5100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>D费用</acctitemtypename>
<dccowedigits>1000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>E费用</acctitemtypename>
<dccowedigits>3000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>滞纳金</acctitemtypename>
<dccowedigits>1763</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>A费用</acctitemtypename>
<dccowedigits>100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>B费用</acctitemtypename>
<dccowedigits>11800</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>C费用</acctitemtypename>
<dccowedigits>5100</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>D费用</acctitemtypename>
<dccowedigits>1000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>E费用</acctitemtypename>
<dccowedigits>3000</dccowedigits>
</form>
<form>
<resultcode/>
<resultmsg/>
<acctid/>
<paymentmethod/>
<accnbr/>
<control101/>
<billingcycle/>
<totalfee/>
<billfee/>
<control201/>
<ptyroleid/>
<control301/>
<acctitemtypename>滞纳金</acctitemtypename>
<dccowedigits>0</dccowedigits>
</form>
</list>
</dataTable>
</root>
规律
要把下面扁平化后的数据转化成有层次的数据,需要观察XML数据和原始数据的规律,通过我观察了N个小时后,我得出了结论:
- control101只有第一行有数据,其他都没有数据,说明是第一层,结合原始数据看,第一层有四个有月份的数据
- control201只有前面四个form有数据,并且都是2,说明前面四条(第一层)数据每条数据下面都有两层
- 通过control201,再结合ptyroleid和control301的值推算出数据索引:
- 第一条第一层数据(billingcycle是20191001的这条),它的control201为2,说明第二层有两条
- 看第一个form的ptyroleid(0)和control301(5)得出第0类的三层数据为5条
- 看第二个form的ptyroleid(1)和control301(1)得出第1类的三层数据为1条
- 第三个form就不看了,因为control201为2,所以只看两条
- 后面的第一层数据就从当前的位置依次往下推,到最后你会发现ptyroleid和control301为空值的时候刚好满足control201的数据需要,这从侧面证明猜想是对的
- 下面的数据按照3的规律可以得出以下索引:
第一层索引 第二层(数据类型) 第三层索引 第四层数据个数
0 2 0 5
1 1
1 2 0 5
1 1
2 2 0 5
1 1
3 2 0 5
1 1
- 仔细看看这个索引,和原始数据是不是有点像,最外层四个数据,每个数据有两个第二层,第二层下面的第1个类型有5个细项,第2个类型有1个细项(滞纳金)
关键代码
通过以上规律写出以下代码(基于java),直接把XML字符串扔进去转化成带有层次的MAP返回
private List<Map<String,Object>> dealArrearageXml2Detail(String xml) {
List<Map<String,Object>> res = new ArrayList<>();
//xml转map
Map<String, Object> feeMap = XmlToMap.getMapData(xml);
//调用结果标识
String retCode = (String) feeMap.get("retCode");
//返费集合
List<Map<String, Object>> feeList = (List<Map<String, Object>>) feeMap.get("list");
//集合不为空进行赋值
if (feeList != null && !feeList.isEmpty()) {
final String control101 = "control101";//一层个数
final String control201 = "control201";//二层个数
final String control301 = "control301";//三层个数
/**
* 第一遍遍历索引
* Map<最外层索引,Map<第二层序号,三层数据个数>>
*/
Map<Integer,List<Integer>> index = new HashMap<>();
//第一级的个数
int iControl101 = Integer.valueOf(String.valueOf(feeList.get(0).get(control101)));
int lastPos = 0;//上次寻找的位置
for (int i = 0; i < iControl101; i++) {
Map<String, Object> cl101 = feeList.get(i);
int iControl201 = Integer.valueOf(String.valueOf(cl101.get(control201)));
List<Integer> list = new ArrayList<>();
//二层索引处理
for (int j = 0; j < iControl201; j++) {
Map<String, Object> cl201 = feeList.get(lastPos);
int iControl301 = Integer.valueOf(String.valueOf(cl201.get(control301)));
list.add(iControl301);
lastPos ++;
}
index.put(i,list);
//第一层数据封装
Map<String,Object> level1 = new HashMap<>();
level1.put("accnbr",cl101.get("accnbr"));
level1.put("billingcycle",cl101.get("billingcycle"));
level1.put("totalfee",cl101.get("totalfee"));
level1.put("billfee",cl101.get("billfee"));
res.add(level1);
}
//详情数据封装
int lastPosB = 0;//记录最后循环的位置
for (int i = 0; i < res.size(); i++) {
//获取索引
List<Integer> index201 = index.get(i);
int total = index201.stream().mapToInt(x -> x).sum();
List<Map<String,String>> details = new ArrayList<>();
//通过索引去原始列表里面挨个获取详情
for (int j = lastPosB; j < lastPosB + total; j++) {
Map<String,Object> orgDetail = feeList.get(j);
Map<String,String> detail = new HashMap<>();
detail.put("acctitemtypename",String.valueOf(orgDetail.get("acctitemtypename")));
detail.put("dccowedigits",String.valueOf(orgDetail.get("dccowedigits")));
details.add(detail);
}
//记录当前已经到了的位置
lastPosB += total;
res.get(i).put("details",details);
}
}
return res;
}
总结
在工作的中很多时候都要根据两堆数据的结果推断其转换规律,没有什么好的方法,但是千万不要灰心,先仔细观察两堆数据,然后一步一步的去找规律,多找样列去验证自己的猜想。