dfParser.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. #横向合并贷款账户,设置为1行
  2. #账户编号 管理机构 账户标识 开立日期 到期日期 借款金额(本金) 账户币种 业务种类 担保方式 还款期数 还款频率 还款方式 共同借款标志 账户状态 五级分类 余额(本金) 剩余还款期数 本月应还款 应还款日 本月实还款 最近一次还款日期 当前逾期期数 当前逾期总额 逾期31-60天未还本金 逾期61-90天未还本金 逾期61-90天未还本金 逾期91-180天未还本金 逾期180天以上未还本金 账户关闭日期 管理机构类型(银行/小贷公司 从B字段解析出来)
  3. import pandas as pd
  4. import numpy as np;
  5. import utils;
  6. import traceback
  7. import sys
  8. import log
  9. logger = log.logger
  10. #贷款明细大表表头
  11. dfHeaderLoan = ['账户编号','管理机构','账户标识','开立日期','到期日期','借款金额(本金)','账户币种',
  12. '业务种类','担保方式','还款期数','还款频率','还款方式','共同借款标志',
  13. '账户状态','五级分类','余额(本金)','剩余还款期数','本月应还款','应还款日','本月实还款','最近一次还款日期',
  14. '当前逾期期数','当前逾期总额','逾期31-60天未还本金','逾期61-90天未还本金','逾期91-180天未还本金','逾期180天以上未还本金',
  15. '账户关闭日期','管理机构类型','截至日期','转出月份',"贷款负债"]
  16. #贷记卡表头
  17. dfHeaderCreditCard = ['账户编号','发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式',
  18. '账户状态','余额','已用额度','未出单的大额专项分期余额','剩余分期期数','最近6个月平均使用额度','最大使用额',
  19. '账单日','本月应还款','本月实还款','最近一次还款日期','当前逾期期数','当前逾期总额',
  20. '大额专项分期额度','分期额度生效日期','分期额度到期日期','已用分期金额']
  21. #准贷记卡表 考虑纳入贷记里,相同含义字段对上 TODO
  22. dfHeaderCreditCardZ = ['账户编号','发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式',
  23. '账户状态','透支余额','最近6个月平均透支余额','最大透支余额','账单日',
  24. '本月实还款','最近一次还款日期','透支180天以上未付余额']
  25. #还款记录表头
  26. dfHeaderLoanPayRecord=['账户编号','还款日期','还款状态','还款状态值']
  27. #特殊交易类型
  28. #严重程度 指标为加工出来
  29. dfHeaderLoanSpecialTrade=['账户编号','特殊交易类型','发生日期','变更月数','发生金额','明细记录','严重程度']
  30. #贷记卡记录表头
  31. dfHeaderCreditCardPayRecord=['账户编号','还款日期','还款状态','还款状态值']
  32. #贷记卡记录表头
  33. dfHeaderCreditCardPayRecordZ=['账户编号','还款日期','还款状态','还款状态值']
  34. #被追偿信息表头
  35. dfHeaderRecoveryInfo=['账户编号','管理机构','业务种类','债权接收日期','债权金额','债权转移时的还款状态',
  36. '账户状态','账户关闭日期','特殊交易类型','发生日期','变更月份','发生金额','明细记录','截至日期']
  37. #公积金
  38. dfHeaderHousingFundRcd=['账户编号','参缴地', '参缴日期', '初缴月份', '缴至月份', '缴费状态', '月缴存额', '个人缴存比例', '单位缴存比例','缴费单位','信息更新日期']
  39. #
  40. #截止日期
  41. def formatDateJz(str):
  42. return str[2:len(str)].replace('年','-').replace('月','-').replace('日','');
  43. #替换日期
  44. def formatDate(str):
  45. return str.replace('.','-');
  46. # 科学计数法转换
  47. def replaceAmt(value):
  48. if str(value)=="" or str(value)=="--" or str(value)=="nan":
  49. return value.replace(',', '')
  50. else:
  51. return float(value.replace(',', ''))
  52. #转整型
  53. def toInt(value):
  54. if str(value)=="" or str(value)=="--" or str(value)=="nan":
  55. return 0
  56. else:
  57. return int(value)
  58. #合并贷款记录dataframe为宽表
  59. def mergeLoanDf(dfObj,idx,reportTime):
  60. loanDf = pd.DataFrame(columns=dfHeaderLoan, index=[0])
  61. df = dfObj["df"]
  62. # print(df)
  63. df1 = df.loc[1, :].dropna().reset_index(drop=True)
  64. df3 = df.loc[3, :].dropna().reset_index(drop=True)
  65. df4 = df.loc[4, :].dropna().reset_index(drop=True)
  66. df6 = df.loc[6, :].dropna().reset_index(drop=True)
  67. loanDf.loc[0, '账户编号'] = idx+1;# 账户编号
  68. loanDf.loc[0, '管理机构'] = df1[0].replace('\n','');
  69. loanDf.loc[0, '账户标识'] = df1[1];
  70. loanDf.loc[0, '开立日期'] = formatDate(df1[2]);
  71. loanDf.loc[0, '到期日期'] = formatDate(df1[3]);
  72. loanDf.loc[0, '借款金额(本金)'] = replaceAmt(df1[4]);
  73. loanDf.loc[0, '账户币种'] = df1[5];
  74. loanDf.loc[0, '业务种类'] = df3[0];
  75. loanDf.loc[0, '担保方式'] = df3[1];
  76. loanDf.loc[0, '还款期数'] = toInt(df3[2]);
  77. loanDf.loc[0, '还款频率'] = df3[3];
  78. loanDf.loc[0, '还款方式'] = df3[4];
  79. loanDf.loc[0, '共同借款标志'] = df3[5];
  80. loanDf.loc[0, '截至日期'] = formatDateJz(df4[0]);
  81. accountStatus = df6[0]
  82. loanDf.loc[0, '账户状态'] = accountStatus;
  83. # print("状态之前")
  84. # print(accountStatus)
  85. if accountStatus != "结清" and accountStatus!="呆账" and accountStatus!="转出":
  86. df8 = df.loc[8, :].dropna().reset_index(drop=True)# 逾期信息
  87. loanDf.loc[0, '五级分类'] = df6[1]
  88. loanDf.loc[0, '余额(本金)'] = replaceAmt(df6[2])
  89. loanDf.loc[0, '剩余还款期数'] = toInt(df6[3])
  90. loanDf.loc[0, '本月应还款'] = replaceAmt(df6[4])
  91. loanDf.loc[0, '应还款日'] = formatDate(df6[5])
  92. loanDf.loc[0, '本月实还款'] = replaceAmt(df6[6])
  93. loanDf.loc[0, '最近一次还款日期'] = formatDate(df6[7])
  94. #'当前逾期期数','当前逾期总额','逾期31-60天未还本金','逾期61-90天未还本金','逾期91-180天未还本金','逾期180天以上未还本金'
  95. loanDf.loc[0, '当前逾期期数'] = toInt(df8[0])
  96. loanDf.loc[0, '当前逾期总额'] = replaceAmt(df8[1])
  97. loanDf.loc[0, '逾期31-60天未还本金'] = replaceAmt(df8[2])
  98. loanDf.loc[0, '逾期61-90天未还本金'] = replaceAmt(df8[3])
  99. loanDf.loc[0, '逾期91-180天未还本金'] = replaceAmt(df8[4])
  100. loanDf.loc[0, '逾期180天以上未还本金'] = replaceAmt(df8[5])
  101. loanDf.loc[0, '截至日期月份'] = utils.difMonthReportTime(formatDateJz(df4[0]), reportTime); # 截止日期离报告时间月份
  102. elif accountStatus == "结清":
  103. loanDf.loc[0, '账户关闭日期'] = formatDate(df6[1]);
  104. elif accountStatus == "转出":
  105. loanDf.loc[0, '转出月份'] = formatDate(df6[1]);
  106. elif accountStatus == "呆账":
  107. loanDf.loc[0, '余额(本金)'] = replaceAmt(df6[1]);
  108. loanDf.loc[0, '最近一次还款日期'] = formatDate(df6[2]);
  109. if df1[0].find("\"")>0:
  110. loanDf.loc[0, '管理机构类型'] = df1[0].split("\"")[0].replace('\n','');
  111. dfObj["loanDf"] = loanDf;
  112. # print(loanDf)
  113. #解析还款记录
  114. loanPayRecord = pd.DataFrame()
  115. #特殊交易
  116. specialTradeDf = pd.DataFrame()
  117. if accountStatus == "正常":#正常
  118. recordIndexBegin = 11;
  119. loanPayRecord = mergeLoanPayRecordDf(recordIndexBegin, df, idx,loanPayRecord)
  120. specialTradeDf = mergeLoanSpecialTrade(recordIndexBegin, df, idx,specialTradeDf)
  121. elif accountStatus == "逾期":#如果长度小于12后续无需处理
  122. #14 11 有提前最新还款记录的为14
  123. print("是逾期吗")
  124. recordIndexBegin = 11;
  125. loanPayRecord = mergeLoanPayRecordDf(recordIndexBegin, df, idx,loanPayRecord)
  126. print("是逾期吗1")
  127. specialTradeDf = mergeLoanSpecialTrade(recordIndexBegin, df, idx, specialTradeDf)
  128. print("是逾期吗2")
  129. elif accountStatus == "结清":
  130. recordIndexBegin = 9;
  131. loanPayRecord = mergeLoanPayRecordDf(recordIndexBegin, df, idx, loanPayRecord)
  132. specialTradeDf = mergeLoanSpecialTrade(recordIndexBegin, df, idx, specialTradeDf)
  133. elif accountStatus == "呆账" or accountStatus == "转出":
  134. recordIndexBegin = 9;
  135. loanPayRecord = mergeLoanPayRecordDf(recordIndexBegin,df,idx,loanPayRecord)
  136. specialTradeDf = mergeLoanSpecialTrade(recordIndexBegin, df, idx, specialTradeDf)
  137. else:
  138. print("===================还款状态不满足条件===================")
  139. dfObj["loanPayRecordDf"] = loanPayRecord;
  140. # print(loanPayRecord)
  141. print("声明")
  142. dfObj["specialTradeDf"] = specialTradeDf;
  143. # print(specialTradeDf)
  144. # print(dfObj)
  145. return dfObj;
  146. #计算特殊交易
  147. # 从“信贷交易信息明细”中提取,搜索所有发生有“特殊交易”的账户,统计所有“特殊交易类型”。交易的严重程度:
  148. # 取max
  149. # 说明特殊交易类型的代码。
  150. # 代码表如下:
  151. # 1-展期
  152. # 2-担保人(第三方)代偿
  153. # 3-以资抵债
  154. # 4-提前还款(包括提前归还部分本金、还款期限不变,以
  155. # 及缩短还款期限两种情况)
  156. # 5-提前结清
  157. # 6-强制平仓,未结清
  158. # 7-强制平仓,已结清
  159. # 8-司法追偿
  160. # 9-其他
  161. # 11-债务减免
  162. # 12-资产剥离
  163. # 13-资产转让
  164. # 14-信用卡个性化分期
  165. # 16-银行主动延期
  166. # 17-强制平仓
  167. # 0= tradetype in ("提前还款(部分)","提前还款(全部)");
  168. # 0 = tradetype in ("其他") and find(content,"提前")>0;
  169. # 1= tradetype in ("担保人代还");
  170. # 2 = tradetype in ("其他") and content not in ("担保人代还取消","合同展期");
  171. # 2 = tradetype in ("展期(延期)") and content in ("专升本或研究生入学展期");
  172. # 3 = tradetype in ("其他") and content in ("担保人代还取消","合同展期");
  173. # 3=tradetype in ("展期(延期)") and content not in ("专升本或研究生入学展期") ;
  174. # 4=tradetype in ("以资抵债")。
  175. def mergeLoanSpecialTrade(recordIndexBegin,df,idx,specialTradeDf):
  176. for i in range(recordIndexBegin, df.index.size):
  177. dfRecord = df.loc[i, :].dropna().reset_index(drop=True)
  178. # print(df.loc[i, :].dropna())
  179. if len(dfRecord) == 5: # 特殊交易
  180. # print(dfRecord)
  181. specialTradeDfTmp = pd.DataFrame(columns=dfHeaderLoanSpecialTrade, index=[0])
  182. if dfRecord[0]!='特殊交易类型':
  183. specialTradeDfTmp.loc[0, '账户编号'] = idx + 1; # 账户编号
  184. specialTradeDfTmp.loc[0,'特殊交易类型'] = dfRecord[0]
  185. specialTradeDfTmp.loc[0,'发生日期'] = formatDate(dfRecord[1])
  186. print("日期")
  187. # specialTradeDfTmp.loc[0,'变更月数'] = toInt(dfRecord[1]) # yuan
  188. print("变更")
  189. specialTradeDfTmp.loc[0, '发生金额'] = replaceAmt(dfRecord[3])
  190. specialTradeDfTmp.loc[0, '明细记录'] = dfRecord[3].replace("\n","")
  191. tradeType = dfRecord[0]
  192. content = dfRecord[3].replace("\n","")#明细记录
  193. #TODO 加工严重程度 逻辑待确认
  194. severity = None
  195. if tradeType == '提前还款' or tradeType == '提前结清':
  196. severity = 0
  197. elif tradeType == '其他' and content.find("提前")>=0:
  198. severity = 0
  199. elif tradeType == '担保人(第三方)代偿':
  200. severity= 1
  201. # elif tradeType == "其他" and content.find("担保人代还取消")<0 and content.find("合同展期")<0:
  202. # severity = 2
  203. #同时出现代还和取消
  204. elif tradeType == "其他" and (content.find("代还") < 0 or content.find("代偿") < 0) and content.find("取消") < 0 and content.find("合同展期") < 0:
  205. severity = 2
  206. elif (tradeType == "展期" or tradeType.find('延期')>=0) and content.find("专升本或研究生入学展期")>=0:
  207. severity = 2
  208. # elif tradeType == "其他" and (content.find("担保人代还取消")>=0 or content.find("合同展期")>=0):
  209. # severity = 3
  210. elif tradeType == "其他" and (( (content.find("代还") < 0 or content.find("代偿") < 0) and content.find("取消") >=0) or (content.find("合同展期") >= 0)):
  211. severity = 3
  212. elif (tradeType == "展期" or tradeType.find('延期')>=0) and content.find("专升本或研究生入学展期")<0:
  213. severity = 3
  214. elif tradeType == "以资抵债":
  215. severity = 4
  216. specialTradeDfTmp.loc[0, '严重程度'] = severity
  217. specialTradeDf = pd.concat([specialTradeDf, specialTradeDfTmp], axis=0, ignore_index=True);
  218. return specialTradeDf;
  219. # '特殊交易类型','发生日期','变更月数','发生金额','明细记录'
  220. #合并贷款还款记录明细
  221. def mergeLoanPayRecordDf(recordIndexBegin,df,idx,loanPayRecord):
  222. for i in range(recordIndexBegin, df.index.size):
  223. dfRecord = df.loc[i, :].dropna().reset_index(drop=True)
  224. if len(dfRecord) == 13:
  225. loanPayRecordTmp = pd.DataFrame(columns=dfHeaderLoanPayRecord, index=[0])
  226. loanPayRecordTmp.loc[0, '账户编号'] = idx + 1; # 账户编号
  227. payYear = dfRecord[0];
  228. # for j in range(0,12):
  229. # payMonth = payYear +"-" +str(j+1)+"-1"
  230. # loanPayRecordTmp.loc[0, '还款日期'] = payMonth;
  231. # loanPayRecordTmp.loc[0, '还款状态'] = dfRecord[j+1];
  232. elif len(dfRecord) == 12:
  233. for j in range(0, 12):
  234. if j < 9:
  235. payMonth = payYear + "-0" + str(j + 1) + "-01"
  236. else:
  237. payMonth = payYear + "-" + str(j + 1) + "-01"
  238. loanPayRecordTmp.loc[0, '还款日期'] = payMonth;
  239. loanPayRecordTmp.loc[0, '还款状态值'] = dfRecord[j];
  240. dfRecordPre = df.loc[i - 1, :].dropna().reset_index(drop=True)
  241. loanPayRecordTmp.loc[0, '还款状态'] = dfRecordPre[j + 1];
  242. # loanPayRecords.append(loanPayRecord)
  243. loanPayRecord = pd.concat([loanPayRecord, loanPayRecordTmp], axis=0, ignore_index=True);
  244. # elif len(dfRecord)==5:#特殊交易
  245. #'特殊交易类型','发生日期','变更月数','发生金额','明细记录'
  246. if not loanPayRecord.empty:
  247. loanPayRecord = loanPayRecord[loanPayRecord['还款状态']!=""];
  248. return loanPayRecord;
  249. #合并贷记卡为宽表
  250. # ['账户编号','发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式',
  251. # '账户状态','余额','已用额度','未出单的大额专项分期余额','剩余分期期数','最近6个月平均使用额度','最大使用额',
  252. # '账单日','本月应还款','本月实还款','最近一次还款日期','当前逾期期数','当前逾期总额',
  253. # '大额专项分期额度','分期额度生效日期','分期额度到期日期','已用分期金额']
  254. def mergeCreditCardDf(dfObj,idx,reportTime):
  255. creditCardDf = pd.DataFrame(columns=dfHeaderCreditCard, index=[0])
  256. df = dfObj["df"]
  257. df1 = df.loc[1, :].dropna().reset_index(drop=True)
  258. # print(df)
  259. if df.index.size==2:
  260. logger.info("账户:"+str(idx+1)+"数据问题,未激活分页")
  261. if df.index.size > 2:
  262. df2 = df.loc[2, :].dropna().reset_index(drop=True)
  263. creditCardDf.loc[0, '账户编号'] = idx + 1; # 账户编号
  264. creditCardDf.loc[0, '发卡机构'] = df1[0].replace('\n', '');
  265. creditCardDf.loc[0, '账户标识'] = df1[1]
  266. creditCardDf.loc[0, '开立日期'] = formatDate(df1[2])
  267. creditCardDf.loc[0, '账户授信额度'] = replaceAmt(df1[3])
  268. creditCardDf.loc[0, '共享授信额度'] = replaceAmt(df1[4])
  269. #排除美元027
  270. if df1[5].find('美元')>=0:
  271. return None
  272. creditCardDf.loc[0, '币种'] = df1[5]
  273. creditCardDf.loc[0, '业务种类'] = df1[6]
  274. creditCardDf.loc[0, '担保方式'] = df1[7]
  275. if df.index.size>2:
  276. creditCardDf.loc[0, '截至日期'] = formatDateJz(df2[0]);
  277. accountStatus = ""
  278. if df.index.size > 3 and df2[0].find('未激活')<0:#非未激活
  279. # print("非未激活")
  280. df4 = df.loc[4, :].dropna().reset_index(drop=True)
  281. if df4[0] == "呆账":
  282. # df6 = df.loc[6, :].dropna().reset_index(drop=True)
  283. accountStatus = df4[0]
  284. creditCardDf.loc[0, '账户状态'] = accountStatus
  285. creditCardDf.loc[0, '余额'] = replaceAmt(df4[1])
  286. creditCardDf.loc[0, '最近一次还款日期'] = formatDate(df4[2])
  287. creditCardDf.loc[0, '截至日期月份'] = utils.difMonthReportTime(formatDateJz(df2[0]), reportTime); # 截止日期离报告时间月份
  288. elif df4[0] != '销户':
  289. try:
  290. df6 = df.loc[6, :].dropna().reset_index(drop=True)
  291. accountStatus = df4[0]
  292. creditCardDf.loc[0, '账户状态'] = df4[0]
  293. creditCardDf.loc[0, '余额'] = replaceAmt(df4[1])
  294. creditCardDf.loc[0, '已用额度'] = replaceAmt(df4[2])
  295. dividePeriodBalance = df4[3] # 未出单的大额专项分期余额
  296. creditCardDf.loc[0, '未出单的大额专项分期余额'] = replaceAmt(df4[3])
  297. creditCardDf.loc[0, '剩余分期期数'] = toInt(df4[4])
  298. creditCardDf.loc[0, '最近6个月平均使用额度'] = replaceAmt(df4[5])
  299. creditCardDf.loc[0, '最大使用额'] = replaceAmt(df4[6])
  300. creditCardDf.loc[0, '账单日'] = formatDate(df6[0])
  301. creditCardDf.loc[0, '本月应还款'] = replaceAmt(df6[1])
  302. creditCardDf.loc[0, '本月实还款'] = replaceAmt(df6[2])
  303. creditCardDf.loc[0, '最近一次还款日期'] = formatDate(df6[3])
  304. creditCardDf.loc[0, '当前逾期期数'] = toInt(df6[4])
  305. creditCardDf.loc[0, '当前逾期总额'] = replaceAmt(df6[5])
  306. creditCardDf.loc[0, '截至日期月份'] = utils.difMonthReportTime(formatDateJz(df2[0]), reportTime); # 截止日期离报告时间月份
  307. # print("非销户")
  308. except Exception:
  309. logger.info("解析贷记卡异常-"+idx);
  310. info = sys.exc_info()
  311. logger.error(info[0])
  312. logger.error(info[1])
  313. logger.error(traceback.extract_tb(info[2]))
  314. if dividePeriodBalance !="--":
  315. #'大额专项分期额度','分期额度生效日期','分期额度到期日期','已用分期金额']
  316. df9 = df.loc[9, :].dropna().reset_index(drop=True)
  317. creditCardDf.loc[0, '大额专项分期额度'] = replaceAmt(df9[0])
  318. creditCardDf.loc[0, '分期额度生效日期'] = formatDate(df9[1])
  319. creditCardDf.loc[0, '分期额度到期日期'] = formatDate(df9[2])
  320. creditCardDf.loc[0, '已用分期金额'] = replaceAmt(df9[3])
  321. else:
  322. accountStatus = '销户'
  323. creditCardDf.loc[0, '账户状态'] = '销户'
  324. # if df.index.size > 3 and df4[0] == '销户':
  325. # creditCardDf.loc[0, '账户状态'] = '销户'
  326. # elif df.index.size > 3 and df4[0] != '销户':
  327. # creditCardDf.loc[0, '账户状态'] = df4[0]
  328. else:#未激活
  329. creditCardDf.loc[0, '账户状态'] = '未激活'
  330. accountStatus= '未激活'
  331. if df.index.size > 2:
  332. creditCardDf.loc[0, '截至日期'] = creditCardDf.loc[0, '截至日期'][0:10]#去掉未激活字样
  333. dfObj["creditCardDf"] = creditCardDf;
  334. # 解析还款记录
  335. creditCardPayRecord = pd.DataFrame()
  336. if accountStatus == "正常" or accountStatus=="冻结" or accountStatus=="止付": # 正常
  337. recordIndexBegin = 9;
  338. print("正常合并之前")
  339. creditCardPayRecord = mergeCreditCardPayRecordDf(recordIndexBegin, df, idx, creditCardPayRecord)
  340. # print("正常合并之后")
  341. elif accountStatus == "未激活":
  342. recordIndexBegin = 0;
  343. elif accountStatus == "销户" or accountStatus == "呆账":
  344. recordIndexBegin = 7;
  345. creditCardPayRecord = mergeCreditCardPayRecordDf(recordIndexBegin, df, idx, creditCardPayRecord)
  346. else:
  347. print("账户#"+str(idx+1)+"#"+accountStatus)
  348. print("===================还款状态不满足条件===================")
  349. dfObj["creditCardPayRecordDf"] = creditCardPayRecord;
  350. return dfObj;
  351. #合并贷记卡还款记录明细
  352. def mergeCreditCardPayRecordDf(recordIndexBegin,df,idx,creditCardPayRecord):
  353. for i in range(recordIndexBegin, df.index.size):
  354. dfRecord = df.loc[i, :].dropna().reset_index(drop=True)
  355. # print(len(dfRecord))
  356. if len(dfRecord) == 13:
  357. # print(dfRecord)
  358. if dfRecord[0]!='':
  359. # print("非空")
  360. recordTmp = pd.DataFrame(columns=dfHeaderCreditCardPayRecord, index=[0])
  361. recordTmp.loc[0, '账户编号'] = idx + 1; # 账户编号
  362. payYear = dfRecord[0];
  363. #如果为13条记录,年份为空,说明是被合并的记录,此条记录计算为还款状态
  364. if dfRecord[0] =='':
  365. print("是空")
  366. # ,这块逻辑有问题yuan
  367. for j in range(1, 13):
  368. if j < 10:
  369. payMonth = payYear + "-0" + str(j ) + "-01"
  370. else:
  371. payMonth = payYear + "-" + str(j) + "-01"
  372. recordTmp.loc[0, '还款日期'] = payMonth;
  373. recordTmp.loc[0, '还款状态值'] = dfRecord[j];
  374. dfRecordPre = df.loc[i - 1, :].dropna().reset_index(drop=True)
  375. recordTmp.loc[0, '还款状态'] = dfRecordPre[j];
  376. creditCardPayRecord = pd.concat([creditCardPayRecord, recordTmp], axis=0, ignore_index=True);
  377. elif len(dfRecord) == 12:
  378. for j in range(0, 12):
  379. if j < 9:
  380. payMonth = payYear + "-0" + str(j + 1) + "-01"
  381. else:
  382. payMonth = payYear + "-" + str(j + 1) + "-01"
  383. recordTmp.loc[0, '还款日期'] = payMonth;
  384. recordTmp.loc[0, '还款状态值'] = dfRecord[j];
  385. dfRecordPre = df.loc[i - 1, :].dropna().reset_index(drop=True)
  386. recordTmp.loc[0, '还款状态'] = dfRecordPre[j + 1];
  387. creditCardPayRecord = pd.concat([creditCardPayRecord, recordTmp], axis=0, ignore_index=True);
  388. if not creditCardPayRecord.empty:
  389. # print("空不空")
  390. creditCardPayRecord = creditCardPayRecord[creditCardPayRecord['还款状态'] != ""];
  391. return creditCardPayRecord;
  392. #合并被追偿信息
  393. def mergeRecoveryInfoDf(dfObj,idx,reportTime):
  394. recoveryInfoDf = pd.DataFrame(columns=dfHeaderCreditCard, index=[0])
  395. df = dfObj["df"]
  396. df1 = df.loc[1, :].dropna().reset_index(drop=True)
  397. df2 = df.loc[2, :].dropna().reset_index(drop=True)
  398. df3 = df.loc[4, :].dropna().reset_index(drop=True)
  399. recoveryInfoDf.loc[0, '账户编号'] = idx + 1; # 账户编号
  400. recoveryInfoDf.loc[0, '管理机构'] = df1[0].replace('\n', '');
  401. recoveryInfoDf.loc[0, '业务种类'] = df1[1]
  402. recoveryInfoDf.loc[0, '债权接收日期'] = formatDate(df1[2])
  403. recoveryInfoDf.loc[0, '债权金额'] = replaceAmt(df1[3])
  404. recoveryInfoDf.loc[0, '债权转移时的还款状态'] = df1[4]
  405. recoveryInfoDf.loc[0, '账户状态'] = df3[0]
  406. accountStatus = df3[0]#账户状态
  407. if df.index.size==7:
  408. df4 = df.loc[5, :].dropna().reset_index(drop=True)
  409. recoveryInfoDf.loc[0, '特殊交易类型'] = df4[0]
  410. recoveryInfoDf.loc[0, '发生日期'] = df4[1]
  411. recoveryInfoDf.loc[0, '变更月份'] = df4[2]
  412. recoveryInfoDf.loc[0, '发生金额'] = df4[3]
  413. recoveryInfoDf.loc[0, '明细记录'] = df4[4]
  414. if accountStatus == "催收":
  415. recoveryInfoDf.loc[0, '余额'] = replaceAmt(df3[1])
  416. recoveryInfoDf.loc[0, '最近一次还款日期'] = formatDate(df3[2])
  417. else:
  418. recoveryInfoDf.loc[0, '账户关闭日期'] = df3[1]
  419. recoveryInfoDf.loc[0, '截至日期'] = formatDateJz(df2[0]);
  420. dfObj["recoveryInfoDf"] = recoveryInfoDf
  421. return dfObj
  422. #合并准贷记卡账户
  423. # ['账户编号','发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式',
  424. # '账户状态','透支余额','最近6个月平均透支余额','最大透支余额','账单日',
  425. # '本月实还款','最近一次还款日期','透支180天以上未付余额']
  426. def mergeCreditCardDfZ(dfObj,idx,reportTime):
  427. creditCardDf = pd.DataFrame(columns=dfHeaderCreditCardZ, index=[0])
  428. df = dfObj["df"]
  429. # print(df)
  430. df1 = df.loc[1, :].dropna().reset_index(drop=True)
  431. if df.index.size==2:
  432. logger.info("账户:"+str(idx+1)+"数据问题")
  433. return None
  434. df2 = df.loc[2, :].dropna().reset_index(drop=True)
  435. creditCardDf.loc[0, '账户编号'] = idx + 1; # 账户编号
  436. creditCardDf.loc[0, '发卡机构'] = df1[0].replace('\n', '');
  437. creditCardDf.loc[0, '账户标识'] = df1[1]
  438. creditCardDf.loc[0, '开立日期'] = formatDate(df1[2])
  439. creditCardDf.loc[0, '账户授信额度'] = replaceAmt(df1[3])
  440. creditCardDf.loc[0, '共享授信额度'] = replaceAmt(df1[4])
  441. creditCardDf.loc[0, '币种'] = df1[5]
  442. creditCardDf.loc[0, '担保方式'] = df1[6]
  443. if df1[5].find('美元')>=0:
  444. return None
  445. creditCardDf.loc[0, '截至日期'] = formatDateJz(df2[0]);
  446. accountStatus = ""
  447. if df.index.size > 3 and df2[0].find('未激活')<0:#非未激活
  448. df4 = df.loc[4, :].dropna().reset_index(drop=True)
  449. if df4[0] == "呆账":
  450. # df6 = df.loc[6, :].dropna().reset_index(drop=True)
  451. accountStatus = df4[0]
  452. creditCardDf.loc[0, '账户状态'] = accountStatus
  453. creditCardDf.loc[0, '余额'] = replaceAmt(df4[1])
  454. creditCardDf.loc[0, '透支余额'] = replaceAmt(df4[1])
  455. creditCardDf.loc[0, '最近一次还款日期'] = formatDate(df4[2])
  456. creditCardDf.loc[0, '截至日期月份'] = utils.difMonthReportTime(formatDateJz(df2[0]), reportTime); # 截止日期离报告时间月份
  457. elif df4[0] != '销户':
  458. try:
  459. # df6 = df.loc[6, :].dropna().reset_index(drop=True)
  460. accountStatus = df4[0]
  461. creditCardDf.loc[0, '账户状态'] = df4[0]
  462. creditCardDf.loc[0, '透支余额'] = replaceAmt(df4[1])
  463. creditCardDf.loc[0, '最近6个月平均透支余额'] = replaceAmt(df4[2])
  464. creditCardDf.loc[0, '最大透支余额'] = replaceAmt(df4[3])
  465. creditCardDf.loc[0, '账单日'] = formatDate(df4[4])
  466. creditCardDf.loc[0, '本月实还款'] = replaceAmt(df4[5])
  467. creditCardDf.loc[0, '最近一次还款日期'] = formatDate(df4[6])
  468. creditCardDf.loc[0, '透支180天以上未付余额'] = replaceAmt(df4[7])
  469. creditCardDf.loc[0, '截至日期月份'] = utils.difMonthReportTime(formatDateJz(df2[0]), reportTime); # 截止日期离报告时间月份
  470. except:
  471. logger.info("解析准贷记卡异常-"+idx);
  472. else:
  473. accountStatus = '销户'
  474. creditCardDf.loc[0, '账户状态'] = '销户'
  475. creditCardDf.loc[0, '最近一次还款日期']=""
  476. # if df.index.size > 3 and df4[0] == '销户':
  477. # creditCardDf.loc[0, '账户状态'] = '销户'
  478. # elif df.index.size > 3 and df4[0] != '销户':
  479. # creditCardDf.loc[0, '账户状态'] = df4[0]
  480. else:#未激活
  481. creditCardDf.loc[0, '账户状态'] = '未激活'
  482. accountStatus= '未激活'
  483. creditCardDf.loc[0, '最近一次还款日期'] = ""
  484. creditCardDf.loc[0, '截至日期'] = creditCardDf.loc[0, '截至日期'][0:10]#去掉未激活字样
  485. dfObj["creditCardDfZ"] = creditCardDf;
  486. # 解析还款记录
  487. creditCardPayRecord = pd.DataFrame()
  488. if accountStatus == "正常" or accountStatus=="冻结" or accountStatus=="止付": # 正常
  489. recordIndexBegin = 9;
  490. creditCardPayRecord = mergeCreditCardPayRecordDf(recordIndexBegin, df, idx, creditCardPayRecord)
  491. elif accountStatus == "未激活":
  492. recordIndexBegin = 0;
  493. elif accountStatus == "销户" or accountStatus == "呆账":
  494. recordIndexBegin = 7;
  495. creditCardPayRecord = mergeCreditCardPayRecordDf(recordIndexBegin, df, idx, creditCardPayRecord)
  496. else:
  497. print("账户#"+str(idx+1)+"#"+accountStatus)
  498. print("===================还款状态不满足条件===================")
  499. dfObj["creditCardPayRecordDfZ"] = creditCardPayRecord;
  500. return dfObj;
  501. #合并公积金
  502. #['账户编号','参缴地', '参缴日期', '初缴月份', '缴至月份', '缴费状态', '月缴存额', '个人缴存比例', '单位缴存比例','缴费单位','信息更新日期']
  503. def mergeHousingFundRcdDf(dfObj,idx,reportTime):
  504. housingFundRcdDf = pd.DataFrame(columns=dfHeaderHousingFundRcd, index=[0])
  505. df = dfObj["df"]
  506. row1 = df.loc[1, :].dropna().reset_index(drop=True)
  507. row3 = df.loc[3, :].dropna().reset_index(drop=True)
  508. housingFundRcdDf.loc[0, '账户编号'] = idx + 1; # 账户编号
  509. housingFundRcdDf.loc[0, '参缴地'] = row1[0].replace("\n","")
  510. housingFundRcdDf.loc[0, '参缴日期'] = utils.formatDate(row1[1])
  511. housingFundRcdDf.loc[0, '初缴月份'] = utils.formatDate(row1[2])
  512. housingFundRcdDf.loc[0, '缴至月份'] = utils.formatDate(row1[3])
  513. housingFundRcdDf.loc[0, '缴费状态'] = row1[4]
  514. housingFundRcdDf.loc[0, '月缴存额'] = utils.replaceAmt(row1[5])
  515. housingFundRcdDf.loc[0, '个人缴存比例'] = row1[6]
  516. housingFundRcdDf.loc[0, '单位缴存比例'] = row1[7]
  517. housingFundRcdDf.loc[0, '缴费单位'] = row3[0]
  518. housingFundRcdDf.loc[0, '信息更新日期'] = utils.formatDate(row3[1])
  519. dfObj["housingFundRcdDf"] = housingFundRcdDf
  520. return dfObj