起因

在工作中遇到一段有层次结构的数据被扁平化后,需要再次还原层次数据结构的问题。

原始数据

数据表达的是某个号码的各种费用,以月份为周期,然后展示了该月份下面的费用详情,该数据是有层次的数据:

Acct-ID                                  XXXXXXXXX
PaymentFlag                                      0
Acc-Nbr                                XXXXXXXXXXX
Fee-Billing-Cycle                                4
    Billing-Cycle-ID                      20191001
    Sum_Charge                               26538
    ChargePayed                                 18
    Acct_Item_Group                              2
        Acct_Item_Group-Id                       0
        Acct-Item-Type                           5
            Acct-Item-Type-NameA费用
            Acct-Item-Charge                   100
            Acct-Item-Type-NameB费用
            Acct-Item-Charge                 11800
            Acct-Item-Type-NameC费用
            Acct-Item-Charge                  5100
            Acct-Item-Type-NameD费用
            Acct-Item-Charge                   982
            Acct-Item-Type-NameE费用
            Acct-Item-Charge                  3000
        Acct_Item_Group-Id                       1
        Acct-Item-Type                           1
            Acct-Item-Type-Name             滞纳金
            Acct-Item-Charge                  5538
    Billing-Cycle-ID                      20191101
    Sum_Charge                               24718
    ChargePayed                                  0
    Acct_Item_Group                              2
        Acct_Item_Group-Id                       0
        Acct-Item-Type                           5
            Acct-Item-Type-NameA费用
            Acct-Item-Charge                   100
            Acct-Item-Type-NameB费用
            Acct-Item-Charge                 11800
            Acct-Item-Type-NameC费用
            Acct-Item-Charge                  5100
            Acct-Item-Type-NameD费用
            Acct-Item-Charge                  1000
            Acct-Item-Type-NameE费用
            Acct-Item-Charge                  3000
        Acct_Item_Group-Id                       1
        Acct-Item-Type                           1
            Acct-Item-Type-Name             滞纳金
            Acct-Item-Charge                  3718
    Billing-Cycle-ID                      20191201
    Sum_Charge                               22763
    ChargePayed                                  0
    Acct_Item_Group                              2
        Acct_Item_Group-Id                       0
        Acct-Item-Type                           5
            Acct-Item-Type-NameA费用
            Acct-Item-Charge                   100
            Acct-Item-Type-NameB费用
            Acct-Item-Charge                 11800
            Acct-Item-Type-NameC费用
            Acct-Item-Charge                  5100
            Acct-Item-Type-NameD费用
            Acct-Item-Charge                  1000
            Acct-Item-Type-NameE费用
            Acct-Item-Charge                  3000
        Acct_Item_Group-Id                       1
        Acct-Item-Type                           1
            Acct-Item-Type-Name             滞纳金
            Acct-Item-Charge                  1763
    Billing-Cycle-ID                      20200101
    Sum_Charge                               21000
    ChargePayed                                  0
    Acct_Item_Group                              2
        Acct_Item_Group-Id                       0
        Acct-Item-Type                           5
            Acct-Item-Type-NameA费用
            Acct-Item-Charge                   100
            Acct-Item-Type-NameB费用
            Acct-Item-Charge                 11800
            Acct-Item-Type-NameC费用
            Acct-Item-Charge                  5100
            Acct-Item-Type-NameD费用
            Acct-Item-Charge                  1000
            Acct-Item-Type-NameE费用
            Acct-Item-Charge                  3000
        Acct_Item_Group-Id                       1
        Acct-Item-Type                           1
            Acct-Item-Type-Name             滞纳金
            Acct-Item-Charge                     0

扁平化后的数据

XML格式的数据,别人返回的数据,现在的需求是根据这个XML数据推断出原始数据是什么

<?xml version="1.0" encoding="GBK"?>
<root>
	<retCode>0</retCode>
	<dataTable>
		<list>
			<form>
				<resultcode>0</resultcode>
				<resultmsg/>
				<acctid>XXXXXXXXX</acctid>
				<paymentmethod>0</paymentmethod>
				<accnbr>XXXXXXXXXXX</accnbr>
				<control101>4</control101>
				<billingcycle>20191001</billingcycle>
				<totalfee>26538</totalfee>
				<billfee>18</billfee>
				<control201>2</control201>
				<ptyroleid>0</ptyroleid>
				<control301>5</control301>
				<acctitemtypename>A费用</acctitemtypename>
				<dccowedigits>100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle>20191101</billingcycle>
				<totalfee>24718</totalfee>
				<billfee>0</billfee>
				<control201>2</control201>
				<ptyroleid>1</ptyroleid>
				<control301>1</control301>
				<acctitemtypename>B费用</acctitemtypename>
				<dccowedigits>11800</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle>20191201</billingcycle>
				<totalfee>22763</totalfee>
				<billfee>0</billfee>
				<control201>2</control201>
				<ptyroleid>0</ptyroleid>
				<control301>5</control301>
				<acctitemtypename>C费用</acctitemtypename>
				<dccowedigits>5100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle>20200101</billingcycle>
				<totalfee>21000</totalfee>
				<billfee>0</billfee>
				<control201>2</control201>
				<ptyroleid>1</ptyroleid>
				<control301>1</control301>
				<acctitemtypename>D费用</acctitemtypename>
				<dccowedigits>982</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid>0</ptyroleid>
				<control301>5</control301>
				<acctitemtypename>E费用</acctitemtypename>
				<dccowedigits>3000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid>1</ptyroleid>
				<control301>1</control301>
				<acctitemtypename>滞纳金</acctitemtypename>
				<dccowedigits>5538</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid>0</ptyroleid>
				<control301>5</control301>
				<acctitemtypename>A费用</acctitemtypename>
				<dccowedigits>100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid>1</ptyroleid>
				<control301>1</control301>
				<acctitemtypename>B费用</acctitemtypename>
				<dccowedigits>11800</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>C费用</acctitemtypename>
				<dccowedigits>5100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>D费用</acctitemtypename>
				<dccowedigits>1000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>E费用</acctitemtypename>
				<dccowedigits>3000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>滞纳金</acctitemtypename>
				<dccowedigits>3718</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>A费用</acctitemtypename>
				<dccowedigits>100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>B费用</acctitemtypename>
				<dccowedigits>11800</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>C费用</acctitemtypename>
				<dccowedigits>5100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>D费用</acctitemtypename>
				<dccowedigits>1000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>E费用</acctitemtypename>
				<dccowedigits>3000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>滞纳金</acctitemtypename>
				<dccowedigits>1763</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>A费用</acctitemtypename>
				<dccowedigits>100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>B费用</acctitemtypename>
				<dccowedigits>11800</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>C费用</acctitemtypename>
				<dccowedigits>5100</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>D费用</acctitemtypename>
				<dccowedigits>1000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>E费用</acctitemtypename>
				<dccowedigits>3000</dccowedigits>
			</form>
			<form>
				<resultcode/>
				<resultmsg/>
				<acctid/>
				<paymentmethod/>
				<accnbr/>
				<control101/>
				<billingcycle/>
				<totalfee/>
				<billfee/>
				<control201/>
				<ptyroleid/>
				<control301/>
				<acctitemtypename>滞纳金</acctitemtypename>
				<dccowedigits>0</dccowedigits>
			</form>
		</list>
	</dataTable>
</root>

规律

要把下面扁平化后的数据转化成有层次的数据,需要观察XML数据和原始数据的规律,通过我观察了N个小时后,我得出了结论:

  1. control101只有第一行有数据,其他都没有数据,说明是第一层,结合原始数据看,第一层有四个有月份的数据
  2. control201只有前面四个form有数据,并且都是2,说明前面四条(第一层)数据每条数据下面都有两层
  3. 通过control201,再结合ptyroleid和control301的值推算出数据索引:
    • 第一条第一层数据(billingcycle是20191001的这条),它的control201为2,说明第二层有两条
    • 看第一个form的ptyroleid(0)和control301(5)得出第0类的三层数据为5条
    • 看第二个form的ptyroleid(1)和control301(1)得出第1类的三层数据为1条
    • 第三个form就不看了,因为control201为2,所以只看两条
    • 后面的第一层数据就从当前的位置依次往下推,到最后你会发现ptyroleid和control301为空值的时候刚好满足control201的数据需要,这从侧面证明猜想是对的
  4. 下面的数据按照3的规律可以得出以下索引:
第一层索引   第二层(数据类型)        第三层索引       第四层数据个数
    0             2                    0                 5
                                       1                 1
    1             2                    0                 5
                                       1                 1
    2             2                    0                 5
                                       1                 1
    3             2                    0                 5
                                       1                 1
  1. 仔细看看这个索引,和原始数据是不是有点像,最外层四个数据,每个数据有两个第二层,第二层下面的第1个类型有5个细项,第2个类型有1个细项(滞纳金)

关键代码

通过以上规律写出以下代码(基于java),直接把XML字符串扔进去转化成带有层次的MAP返回

private List<Map<String,Object>> dealArrearageXml2Detail(String xml) {
    List<Map<String,Object>> res = new ArrayList<>();
    //xml转map
    Map<String, Object> feeMap = XmlToMap.getMapData(xml);
    //调用结果标识
    String retCode = (String) feeMap.get("retCode");
    //返费集合
    List<Map<String, Object>> feeList = (List<Map<String, Object>>) feeMap.get("list");
    //集合不为空进行赋值
    if (feeList != null && !feeList.isEmpty()) {
        final String control101 = "control101";//一层个数
        final String control201 = "control201";//二层个数
        final String control301 = "control301";//三层个数
        /**
         * 第一遍遍历索引
         * Map<最外层索引,Map<第二层序号,三层数据个数>>
         */
        Map<Integer,List<Integer>> index = new HashMap<>();
        //第一级的个数
        int iControl101 = Integer.valueOf(String.valueOf(feeList.get(0).get(control101)));
        int lastPos = 0;//上次寻找的位置
        for (int i = 0; i < iControl101; i++) {
            Map<String, Object> cl101 = feeList.get(i);
            int iControl201 = Integer.valueOf(String.valueOf(cl101.get(control201)));
            List<Integer> list = new ArrayList<>();
            //二层索引处理
            for (int j = 0; j < iControl201; j++) {
                Map<String, Object> cl201 = feeList.get(lastPos);
                int iControl301 = Integer.valueOf(String.valueOf(cl201.get(control301)));
                list.add(iControl301);
                lastPos ++;
            }
            index.put(i,list);

            //第一层数据封装
            Map<String,Object> level1 = new HashMap<>();
            level1.put("accnbr",cl101.get("accnbr"));
            level1.put("billingcycle",cl101.get("billingcycle"));
            level1.put("totalfee",cl101.get("totalfee"));
            level1.put("billfee",cl101.get("billfee"));
            res.add(level1);
        }


        //详情数据封装
        int lastPosB = 0;//记录最后循环的位置
        for (int i = 0; i < res.size(); i++) {
            //获取索引
            List<Integer> index201 = index.get(i);
            int total = index201.stream().mapToInt(x -> x).sum();
            List<Map<String,String>> details = new ArrayList<>();
            //通过索引去原始列表里面挨个获取详情
            for (int j = lastPosB; j < lastPosB + total; j++) {
                Map<String,Object> orgDetail = feeList.get(j);
                Map<String,String> detail = new HashMap<>();
                detail.put("acctitemtypename",String.valueOf(orgDetail.get("acctitemtypename")));
                detail.put("dccowedigits",String.valueOf(orgDetail.get("dccowedigits")));
                details.add(detail);
            }
            //记录当前已经到了的位置
            lastPosB += total;
            res.get(i).put("details",details);
        }
    }
    return res;
}

总结

在工作的中很多时候都要根据两堆数据的结果推断其转换规律,没有什么好的方法,但是千万不要灰心,先仔细观察两堆数据,然后一步一步的去找规律,多找样列去验证自己的猜想。