#从“贷记卡信息”中提取,人民币账户、状态不为呆账、未激活、销户,授信额度:SUM(各账户授信额度), # 如同一个机构有多个账户,只取额度最高的计算 import numpy as np;# import pandas as pd import utils import math; import log logger = log.logger #使用中贷记卡平均发卡天数 #三寿 def getAvgUseCreditCardDay(creditDf,creditCardDfZ,reportTime): activeDf = creditDf[(creditDf['币种'] == '人民币元') & (creditDf['账户状态'] == '正常')]; activeDfZ = creditCardDfZ[(creditCardDfZ['币种'] == '人民币元') & (creditCardDfZ['账户状态'] == '正常')]; openDateDf = activeDf["开立日期"] if not activeDfZ.empty: openDateDf.append(activeDfZ["开立日期"]) if not openDateDf.empty: result= math.ceil(np.mean(openDateDf.apply(lambda x: utils.difDateReportTime(reportTime,x )))) else: result= None return result #额度使用 >= 50 % 的贷记卡数 def getUseRateCount(creditCardDf,creditCardDfZ,rate): creditCardDf = creditCardDf[creditCardDf['账户授信额度'] > 0] creditCardDf = creditCardDf[creditCardDf['已用额度'] / creditCardDf['账户授信额度'] >= rate] creditCardDfZ = creditCardDfZ[creditCardDfZ['账户授信额度'] > 0] creditCardDfZ = creditCardDfZ[creditCardDfZ['透支余额'] / creditCardDfZ['账户授信额度'] >= rate] return creditCardDf.index.size+creditCardDfZ.index.size #开户1年以内历史逾期90+贷记卡和准贷记卡数 def getOverDueCount(creditCardDf,creditCardPayRecordMergeDf,reportTime,overValue,month): openAccountDfTmp = creditCardDf[creditCardDf['开立日期'] > utils.getLastMonthDate(reportTime, month)] overdueCreditPayRcdDfTmp = creditCardPayRecordMergeDf[ creditCardPayRecordMergeDf['账户编号'].isin(openAccountDfTmp['账户编号'].values)] overdueCreditPayRcdDfTmp = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDfTmp) overdueCreditPayRcdDfTmp = overdueCreditPayRcdDfTmp[overdueCreditPayRcdDfTmp['还款状态'] >= overValue] return overdueCreditPayRcdDfTmp.index.size #最大信用账户天数(贷款&贷记卡人民账户) def getMaxAmtAccountDay(loanDf,creditDf,creditCardDfZ,reportTime): activeDf = creditDf[(creditDf['币种'] == '人民币元')].reset_index(drop=True); activeDfZ = creditCardDfZ[(creditCardDfZ['币种'] == '人民币元')].reset_index(drop=True); maxLoanAmt = np.max(loanDf["借款金额(本金)"]) maxCreditAmt = np.max(activeDf["账户授信额度"]) maxCreditAmtZ = np.max(activeDfZ["账户授信额度"]) openDate = "" if str(maxLoanAmt)!="nan" and str(maxCreditAmt)!="nan": if maxLoanAmt>=maxCreditAmt: maxDateIndex = np.argmax(loanDf["借款金额(本金)"]) openDate = loanDf.loc[maxDateIndex, :].dropna()[3] elif maxLoanAmt=100%的机构数)/总机构数 # 机构数为去重后的机构代码的数量 def getUseRate(creditDf,df,rate): # creditDf = creditDf.sort_values(by=["开立日期"], ascending=(False)) # creditDf = creditDf.head(3) # avgCreditAmt = np.mean(creditDf['账户授信额度']) creditDf = creditDf[creditDf['账户授信额度']>0] creditDf = creditDf[creditDf['已用额度']/creditDf['账户授信额度']>=rate] if df['发卡机构'].unique().size>0: return round(creditDf['发卡机构'].unique().size / df['发卡机构'].unique().size,3) else: return None # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各笔贷款按转换为数字后的“24个月(账户)还款状态”的后3位数字中,取最大值即为该账户的近3月最大逾期期数数,然后max(每个账户的近3月最大逾期数) # 例如记录1最后3位数为136,记录2最后3位数为135;则近3月最大逾期期数数=6 # “24个月(账户)还款状态” # 还款记录按日期排序最近3笔的最大逾期期数 def getPayRcdMaxOverdueNum(payRcdDf,month): # dateStr = utils.getLastMonthDate("",month) payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False)) payRcdDf = payRcdDf.groupby(['账户编号']).head(month) payRcdDf = payRcdDf[payRcdDf['还款状态']>0] # maxOverdueNum = np.max(payRcdDf['还款状态']) payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数': 'count'}) maxOverdueNum = np.max(payRcdTimesDf['次数']) return maxOverdueNum; # 当前一共透支期数 # SUM(所有准贷记卡账户当前透支期数),单账户透支期数根据还款记录里当期账户状态,if 为 ("/","*","C","N","#","M") 转化为 "0";if 为("G") 转化为 "7";if为("B")转化为"7";if 为("D") 转化为 "1"。 def getCurOverdueNum(payRcdDf): # dateStr = utils.getLastMonthDate("",month) payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False)) payRcdDf = payRcdDf.groupby(['账户编号']).head(1) # payRcdDf = payRcdDf[payRcdDf['还款状态']>0] overdueSum = np.sum(payRcdDf['还款状态']) return overdueSum; ## 从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”提取,剔除状态为结清、 # 转出、呆账,MAX(各账户24月内发生逾期的次数),逾期符号判断;数字1234567,字母"G"、"D"、"B"。 # 统计存在最大逾期期数账户开立日期距报告日期月数,若存在多笔账户,选择开立日期距报告日期最近的。 def getPayRcdMaxOverdueNumMonth(payRcdDf,df,reportTime,month): # if not df.empty: # # payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False)) # # payRcdDf = payRcdDf.groupby(['账户编号']).head(month)#先取出近两年 # # payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款状态"], ascending=(True, False)) # # payRcdDf = payRcdDf.groupby(['账户编号']).head(1)#取各个账号的最大值 # # endDateDf = df[["账户编号", "截至日期月份"]]; # # # payRcdDf = payRcdDf.set_index('账户编号') # # endDateDf = endDateDf.set_index('账户编号') # # payRcdDf = payRcdDf.join(endDateDf,on='账户编号') # # maxOverdueNum = None; # # if not payRcdDf.empty: # # if np.max(payRcdDf['还款状态'])!=0:#没有逾期不要与报告器计算 # # maxOverdueNum = int(np.max(payRcdDf['还款状态'] + payRcdDf['截至日期月份'])) # # return maxOverdueNum; # # else: # # return None; # # return None; if not df.empty: payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False)) payRcdDf = payRcdDf.groupby(['账户编号']).head(month) payRcdDf = payRcdDf[payRcdDf['还款状态'] > 0] # maxOverdueNum = np.max(payRcdDf['还款状态']) payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数': 'count'}) if not payRcdTimesDf.empty: maxOverdueNumIndex = np.argmax(payRcdTimesDf['次数']) row = payRcdTimesDf.loc[maxOverdueNumIndex, :] accountNum = row[0] openDate = df[df['账户编号'] == (accountNum)].reset_index(drop=True).loc[0,'开立日期'] return utils.difMonthReportTime(openDate, reportTime); return None; #近3月开卡最高额度 def getLastMonthMaxCreditAmt(creditDf,reportTime,month): creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)] # creditDf = creditDf.sort_values(by=["发卡机构", "账户授信额度"], ascending=(True, False)) # creditDf = creditDf.groupby(['发卡机构']).head(1) maxCreditAmt = np.max(creditDf['账户授信额度']) return maxCreditAmt #近3月开卡最低额度 def getLastMonthMinCreditAmt(creditDf,reportTime,month): creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)] maxCreditAmt = np.min(creditDf['账户授信额度']) return maxCreditAmt #近3月开卡平均额度 def getLastMonthAvgCreditAmt(creditDf,reportTime,month): creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)] maxCreditAmt = np.mean(creditDf['账户授信额度']) return round(maxCreditAmt,2) #贷款最近一次还款日期距今时长 #从“贷款信息”中提取,取客户的正常还款行为,不取因为某种特定的行为而产生的还款, # 剔除转出、结清、呆账、呆帐后,取各贷款记录的“最近一次还款日期”的最小值,然后计算距离报告时间的天数,MIN(day(报告时间-最近一次还款日期)) def getLastPayDateMinDays(df,reportTime): return utils.difDateReportTime(reportTime,np.max(df['最近一次还款日期'])) #连续计算算法 def calcContinuos(x): max=0 tmp=0 for a in x: if int(a)>=1 : tmp=tmp+1 else: if tmp>max: max=tmp tmp=0 if max= utils.getLastMonthDate(reportTime, month)] return openAccountDf.index.size return 0; # 贷记卡账户近3月逾期期数大于或等于“1”的次数 def getLoanOverdueTimes(payRcdDf, reportTime,times, month): payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False)) payDate = utils.getLastMonthDate(reportTime,month) # payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款日期']>=payDate] # 汇算帐20210817 # 改为不包含起始区间的1号,如果报告期为6月,取4,5,6,如果报告期为5月取3,4,5,5月2号 -90天 02-01 不能包含 payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)] # 1208改为区间,两种逻辑都符合 payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp) overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times].index.size return overdueTimes; # 贷款账户近3月逾期期数大于或等于“1”的金额-汇算帐新增 def getLoanOverdueAmt(payRcdDf, reportTime,times, month): payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False)) payDate = utils.getLastMonthDate(reportTime,month) payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate)&(payRcdDfTmp['还款日期']<=reportTime)]#1208改为区间,两种逻辑都符合 payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp) overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times] return np.sum(overdueTimes["还款状态值"]); #贷记卡24期还款记录次数 def getPayRcdCount(payRcdDf,normalDf,month): payStatus = ["G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"] # 贷款24期还款记录次数 剔除结清 转出 呆账 payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)] payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False)) payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(month) # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数) payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)] payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count() return np.max(payRcdTimes) #最近6个月有贷款还款记录的月份数 def getPayRcdCountNew(payRcdDf,reportTime,month): # reportTime = str(np.datetime64(reportTime, "M")) + "-02" reportTime = utils.get_last_month_first_day_v2(reportTime) payDate = utils.getLastMonthDate(reportTime, month) # payRcdDfTmp = payRcdDf[payRcdDf['还款日期'] >= payDate] payRcdDfTmp = payRcdDf[(payRcdDf['还款日期'] > payDate) & (payRcdDf['还款日期'] <= reportTime)] payStatus = ["Z","G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"] payRcdTimesDf = payRcdDfTmp[payRcdDfTmp['还款状态'].isin(payStatus)] count = payRcdTimesDf["还款日期"].unique().size; return count