payRcdIndexParser.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import consts
  2. import numpy as np;# 近3月开户最高贷款本金
  3. import utils
  4. import time;
  5. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各笔贷款按转换为数字后的“24个月(账户)还款状态”的后3位数字中,取最大值即为该账户的近3月最大逾期期数数,然后max(每个账户的近3月最大逾期数)
  6. # 例如记录1最后3位数为136,记录2最后3位数为135;则近3月最大逾期期数数=6
  7. # “24个月(账户)还款状态”
  8. # 还款记录按日期排序最近3笔的最大逾期期数 TODO 规则可能会变
  9. def getPayRcdMaxOverdueNum(payRcdDf,month):
  10. # dateStr = utils.getLastMonthDate("",month)
  11. payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
  12. payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
  13. payRcdDf = payRcdDf[payRcdDf['还款状态']>0]
  14. payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数':'count'})
  15. maxOverdueNum = np.max(payRcdTimesDf['次数'])
  16. return maxOverdueNum;
  17. #近24月贷款最大逾期距离现在的月数 TODO 规则不对
  18. # 从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”提取,剔除状态为结清、
  19. # 转出、呆账,MAX(各账户24月内发生逾期的次数),逾期符号判断;数字1234567,字母"G"、"D"、"B"。
  20. # 统计存在最大逾期期数账户开立日期距报告日期月数,若存在多笔账户,选择开立日期距报告日期最近的。
  21. def getPayRcdMaxOverdueNumMonth(payRcdDf,df,reportTime,month):
  22. # payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
  23. # payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
  24. # if not payRcdDf.empty:
  25. # maxOverdueNum = np.argmax(payRcdDf['还款状态'])
  26. # # if maxOverdueNum !=0:
  27. # payDate = payRcdDf.loc[maxOverdueNum,:]['还款日期']
  28. # return utils.difMonth(payDate)
  29. # else:
  30. # return None;
  31. # if not df.empty:
  32. # payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
  33. # payRcdDf = payRcdDf.groupby(['账户编号']).head(month)#先取出近两年
  34. # payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款状态"], ascending=(True, False))
  35. # payRcdDf = payRcdDf.groupby(['账户编号']).head(1)#取各个账号的最大值
  36. # endDateDf = df[["账户编号", "截至日期月份"]];
  37. # endDateDf = endDateDf.set_index('账户编号')
  38. # payRcdDf = payRcdDf.join(endDateDf,on='账户编号')
  39. # maxOverdueNum = None;
  40. # if not payRcdDf.empty:
  41. # if np.max(payRcdDf['还款状态']) != 0: # 没有逾期不要与报告器计算
  42. # maxOverdueNum = int(np.max(payRcdDf['还款状态'] + payRcdDf['截至日期月份']))
  43. # return maxOverdueNum;
  44. # else:
  45. # return None;
  46. if not df.empty:
  47. payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  48. payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
  49. payRcdDf = payRcdDf[payRcdDf['还款状态'] > 0]
  50. # maxOverdueNum = np.max(payRcdDf['还款状态'])
  51. payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数': 'count'})
  52. if not payRcdTimesDf.empty:
  53. maxOverdueNumIndex = np.argmax(payRcdTimesDf['次数'])
  54. row = payRcdTimesDf.loc[maxOverdueNumIndex, :]
  55. accountNum = row[0]
  56. openDate = df[df['账户编号'] == (accountNum)].reset_index(drop=True).loc[0, '开立日期']
  57. return utils.difMonthReportTime(openDate, reportTime);
  58. return None;
  59. # 贷款账户近3月逾期期数大于或等于“1”的次数
  60. def getLoanOverdueTimes(payRcdDf, reportTime,times, month):
  61. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  62. payDate = utils.getLastMonthDate(reportTime,month)
  63. #汇算帐20210817
  64. #改为不包含起始区间的1号,如果报告期为6月,取4,5,6,如果报告期为5月取3,4,5,5月2号 -90天 02-01 不能包含
  65. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate)&(payRcdDfTmp['还款日期'] <= reportTime)]#1208改为区间,两种逻辑都符合
  66. payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
  67. overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times].index.size
  68. return overdueTimes;
  69. # 贷款账户近3月逾期期数大于或等于“1”的金额-汇算帐新增
  70. def getLoanOverdueAmt(payRcdDf, reportTime,times, month):
  71. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  72. payDate = utils.getLastMonthDate(reportTime,month)
  73. # 汇算帐20210817
  74. # 改为不包含起始区间的1号,如果报告期为6月,取4,5,6,如果报告期为5月取3,4,5,5月2号 -90天 02-01 不能包含
  75. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate)&(payRcdDfTmp['还款日期'] <= reportTime)]#1208改为区间,两种逻辑都符合
  76. payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
  77. overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times]
  78. return np.sum(overdueTimes["还款状态值"]);
  79. #贷款账户24个月是否出现G
  80. def isExistsInd(payRcdDf,reportTime,ind,month):
  81. # 过滤为G的还款状态
  82. payDate = utils.getLastMonthDate(reportTime, month)
  83. payRcdDfTmp = payRcdDf[payRcdDf['还款日期'] > payDate]
  84. payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款状态'].isin([ind])]
  85. loanGInd = "0"
  86. if payRcdDfTmp.index.size > 0:
  87. loanGInd = "1";
  88. return loanGInd;
  89. #历史上担保人代偿次数 汇算帐新增0630
  90. #近24个月担保人代偿次数 D Z
  91. def getDbPayCount(payRcdDf, reportTime, payInd,month):
  92. # 过滤为G的还款状态
  93. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  94. payDate = utils.getLastMonthDate(reportTime, month)
  95. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)]
  96. payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款状态'].isin([payInd])]
  97. return payRcdDfTmp.index.size;
  98. # 用户所有贷款账户过去24个月存在逾期的账户数目
  99. def getLoanOverdueCount(payRcdDf, reportTime, month):
  100. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  101. payDate = utils.getLastMonthDate(reportTime,month)
  102. # payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款日期']>=payDate]
  103. # 汇算帐20210817
  104. # 改为不包含起始区间的1号,如果报告期为6月,取4,5,6,如果报告期为5月取3,4,5,5月2号 -90天 02-01 不能包含
  105. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)]
  106. payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
  107. overdueCountDf = payRcdDfTmp[payRcdDfTmp['还款状态']>0]
  108. return overdueCountDf['账户编号'].unique().size;
  109. # 用户所有贷款账户过去24个月存在逾期的账户数目
  110. def getLoanNormalCount(payRcdDf, reportTime, month):
  111. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  112. payDate = utils.getLastMonthDate(reportTime,month)
  113. # payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款日期']>=payDate]
  114. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)]
  115. payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
  116. overdueCountDf = payRcdDfTmp[payRcdDfTmp['还款状态']>0]
  117. payRcdDfAll = payRcdDf[payRcdDf['还款日期'] > payDate]
  118. return payRcdDfAll['账户编号'].unique().size-overdueCountDf['账户编号'].unique().size;
  119. #按报告期计算
  120. def getPayRcdMaxOverdueNumX(payRcdDf,reportTime,month):
  121. if payRcdDf.empty:
  122. return 0
  123. payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  124. payDate = utils.getLastMonthDate(reportTime, month)
  125. # payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款日期'] >= payDate]
  126. payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)]
  127. payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
  128. if payRcdDfTmp.empty:
  129. return 0
  130. maxOverdueNum = np.max(payRcdDfTmp['还款状态'])
  131. return maxOverdueNum;
  132. #用户过去3个月最大逾期期数
  133. def getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,month):
  134. loanOverdueMax = getPayRcdMaxOverdueNumX(loanPayRecordMergeDf, reportTime, month)
  135. creditCardOverdueMax = getPayRcdMaxOverdueNumX(creditCardPayRecordMergeDf, reportTime, month)
  136. creditCardOverdueMaxZ = getPayRcdMaxOverdueNumX(creditCardPayRecordMergeDfZ, reportTime, month)
  137. overdueNum = [loanOverdueMax, creditCardOverdueMax, creditCardOverdueMaxZ]
  138. return np.max(overdueNum)
  139. #近12个月最大逾期期数--不包括准贷记卡,汇算帐需求
  140. def getPayRcdMaxOverdueNumAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,reportTime,month):
  141. loanOverdueMax = getPayRcdMaxOverdueNumX(loanPayRecordMergeDf, reportTime, month)
  142. creditCardOverdueMax = getPayRcdMaxOverdueNumX(creditCardPayRecordMergeDf, reportTime, month)
  143. overdueNum = [loanOverdueMax, creditCardOverdueMax]
  144. return np.max(overdueNum)
  145. #贷款24期还款记录次数
  146. def getPayRcdCount(payRcdDf,normalDf,month):
  147. payStatus = ["G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"]
  148. # 贷款24期还款记录次数 剔除结清 转出 呆账
  149. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  150. payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  151. payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(month)
  152. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
  153. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  154. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  155. return np.max(payRcdTimes)
  156. #最近6个月有贷款还款记录的月份数
  157. def getPayRcdCountNew(payRcdDf,reportTime,month):
  158. # reportTime = str(np.datetime64(reportTime, "M")) + "-02"
  159. reportTime = utils.get_last_month_first_day_v2(reportTime)
  160. payDate = utils.getLastMonthDate(reportTime, month)
  161. # payRcdDfTmp = payRcdDf[payRcdDf['还款日期'] >= payDate]
  162. payRcdDfTmp = payRcdDf[(payRcdDf['还款日期'] > payDate) & (payRcdDf['还款日期'] <= reportTime)]
  163. payStatus = ["Z","G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"]
  164. payRcdTimesDf = payRcdDfTmp[payRcdDfTmp['还款状态'].isin(payStatus)]
  165. count = payRcdTimesDf["还款日期"].unique().size;
  166. return count