123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262 |
- #从“贷记卡信息”中提取,人民币账户、状态不为呆账、未激活、销户,授信额度:SUM(各账户授信额度),
- # 如同一个机构有多个账户,只取额度最高的计算
- import numpy as np;#
- import pandas as pd
- import utils
- import math;
- import log
- logger = log.logger
- #使用中贷记卡平均发卡天数
- #三寿
- def getAvgUseCreditCardDay(creditDf,creditCardDfZ,reportTime):
- activeDf = creditDf[(creditDf['币种'] == '人民币元') & (creditDf['账户状态'] == '正常')];
- activeDfZ = creditCardDfZ[(creditCardDfZ['币种'] == '人民币元') & (creditCardDfZ['账户状态'] == '正常')];
- openDateDf = activeDf["开立日期"]
- if not activeDfZ.empty:
- openDateDf.append(activeDfZ["开立日期"])
- if not openDateDf.empty:
- result= math.ceil(np.mean(openDateDf.apply(lambda x: utils.difDateReportTime(reportTime,x ))))
- else:
- result= None
- return result
- #额度使用 >= 50 % 的贷记卡数
- def getUseRateCount(creditCardDf,creditCardDfZ,rate):
- creditCardDf = creditCardDf[creditCardDf['账户授信额度'] > 0]
- creditCardDf = creditCardDf[creditCardDf['已用额度'] / creditCardDf['账户授信额度'] >= rate]
- creditCardDfZ = creditCardDfZ[creditCardDfZ['账户授信额度'] > 0]
- creditCardDfZ = creditCardDfZ[creditCardDfZ['透支余额'] / creditCardDfZ['账户授信额度'] >= rate]
- return creditCardDf.index.size+creditCardDfZ.index.size
- #开户1年以内历史逾期90+贷记卡和准贷记卡数
- def getOverDueCount(creditCardDf,creditCardPayRecordMergeDf,reportTime,overValue,month):
- openAccountDfTmp = creditCardDf[creditCardDf['开立日期'] > utils.getLastMonthDate(reportTime, month)]
- overdueCreditPayRcdDfTmp = creditCardPayRecordMergeDf[
- creditCardPayRecordMergeDf['账户编号'].isin(openAccountDfTmp['账户编号'].values)]
- overdueCreditPayRcdDfTmp = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDfTmp)
- overdueCreditPayRcdDfTmp = overdueCreditPayRcdDfTmp[overdueCreditPayRcdDfTmp['还款状态'] >= overValue]
- return overdueCreditPayRcdDfTmp.index.size
- #最大信用账户天数(贷款&贷记卡人民账户)
- def getMaxAmtAccountDay(loanDf,creditDf,creditCardDfZ,reportTime):
- activeDf = creditDf[(creditDf['币种'] == '人民币元')].reset_index(drop=True);
- activeDfZ = creditCardDfZ[(creditCardDfZ['币种'] == '人民币元')].reset_index(drop=True);
- maxLoanAmt = np.max(loanDf["借款金额(本金)"])
- maxCreditAmt = np.max(activeDf["账户授信额度"])
- maxCreditAmtZ = np.max(activeDfZ["账户授信额度"])
- openDate = ""
- if str(maxLoanAmt)!="nan" and str(maxCreditAmt)!="nan":
- if maxLoanAmt>=maxCreditAmt:
- maxDateIndex = np.argmax(loanDf["借款金额(本金)"])
- openDate = loanDf.loc[maxDateIndex, :].dropna()[3]
- elif maxLoanAmt<maxCreditAmt:
- maxDateIndex = np.argmax(activeDf["账户授信额度"])
- openDate = activeDf.loc[maxDateIndex, :].dropna()[3]
- elif str(maxLoanAmt)!="nan" and str(maxCreditAmt)=="nan":
- maxDateIndex = np.argmax(loanDf["借款金额(本金)"])
- openDate = loanDf.loc[maxDateIndex, :].dropna()[3]
- elif str(maxLoanAmt) == "nan" and str(maxCreditAmt) != "nan":
- maxDateIndex = np.argmax(activeDf["账户授信额度"])
- openDate = activeDf.loc[maxDateIndex, :].dropna()[3]
- elif str(maxCreditAmt) == "nan" and str(maxCreditAmtZ) != "nan":
- maxDateIndex = np.argmax(maxCreditAmtZ["账户授信额度"])
- openDate = maxCreditAmtZ.loc[maxDateIndex, :].dropna()[3]
- if openDate !="":
- return utils.difDateReportTime(reportTime,openDate)
- else:
- logger.error(str(maxLoanAmt)+"#"+str(maxCreditAmt)+str(maxCreditAmtZ))
- return None
- #最大授信额度
- def getMaxCreditAmt(creditDf):
- creditDf = creditDf.sort_values(by=["发卡机构", "账户授信额度"], ascending=(True, False))
- creditDf = creditDf.groupby(['发卡机构']).head(1)
- maxCreditAmt = np.sum(creditDf['账户授信额度'])
- return maxCreditAmt
- #最近新发放的3张贷记卡平均额度
- def getAvgCreditAmt(creditDf):
- creditDf = creditDf.sort_values(by=["开立日期"], ascending=(False))
- creditDf = creditDf.head(3)
- avgCreditAmt = np.mean(creditDf['账户授信额度'])
- return round(avgCreditAmt,2)
- #从“贷记卡信息”中提取,人民币账户、状态不为呆账、未激活、销户,统计(已用额度/授信额度>=100%的机构数)/总机构数
- # 机构数为去重后的机构代码的数量
- def getUseRate(creditDf,df,rate):
- # creditDf = creditDf.sort_values(by=["开立日期"], ascending=(False))
- # creditDf = creditDf.head(3)
- # avgCreditAmt = np.mean(creditDf['账户授信额度'])
- creditDf = creditDf[creditDf['账户授信额度']>0]
- creditDf = creditDf[creditDf['已用额度']/creditDf['账户授信额度']>=rate]
- if df['发卡机构'].unique().size>0:
- return round(creditDf['发卡机构'].unique().size / df['发卡机构'].unique().size,3)
- else:
- return None
- # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各笔贷款按转换为数字后的“24个月(账户)还款状态”的后3位数字中,取最大值即为该账户的近3月最大逾期期数数,然后max(每个账户的近3月最大逾期数)
- # 例如记录1最后3位数为136,记录2最后3位数为135;则近3月最大逾期期数数=6
- # “24个月(账户)还款状态”
- # 还款记录按日期排序最近3笔的最大逾期期数
- def getPayRcdMaxOverdueNum(payRcdDf,month):
- # dateStr = utils.getLastMonthDate("",month)
- payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
- payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
- payRcdDf = payRcdDf[payRcdDf['还款状态']>0]
- # maxOverdueNum = np.max(payRcdDf['还款状态'])
- payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数': 'count'})
- maxOverdueNum = np.max(payRcdTimesDf['次数'])
- return maxOverdueNum;
- # 当前一共透支期数
- # SUM(所有准贷记卡账户当前透支期数),单账户透支期数根据还款记录里当期账户状态,if 为 ("/","*","C","N","#","M") 转化为 "0";if 为("G") 转化为 "7";if为("B")转化为"7";if 为("D") 转化为 "1"。
- def getCurOverdueNum(payRcdDf):
- # dateStr = utils.getLastMonthDate("",month)
- payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
- payRcdDf = payRcdDf.groupby(['账户编号']).head(1)
- # payRcdDf = payRcdDf[payRcdDf['还款状态']>0]
- overdueSum = np.sum(payRcdDf['还款状态'])
- return overdueSum;
- ## 从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”提取,剔除状态为结清、
- # 转出、呆账,MAX(各账户24月内发生逾期的次数),逾期符号判断;数字1234567,字母"G"、"D"、"B"。
- # 统计存在最大逾期期数账户开立日期距报告日期月数,若存在多笔账户,选择开立日期距报告日期最近的。
- def getPayRcdMaxOverdueNumMonth(payRcdDf,df,reportTime,month):
- # if not df.empty:
- # # payRcdDf = payRcdDf.sort_values(by=["账户编号","还款日期"] , ascending=(True,False))
- # # payRcdDf = payRcdDf.groupby(['账户编号']).head(month)#先取出近两年
- # # payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款状态"], ascending=(True, False))
- # # payRcdDf = payRcdDf.groupby(['账户编号']).head(1)#取各个账号的最大值
- # # endDateDf = df[["账户编号", "截至日期月份"]];
- # # # payRcdDf = payRcdDf.set_index('账户编号')
- # # endDateDf = endDateDf.set_index('账户编号')
- # # payRcdDf = payRcdDf.join(endDateDf,on='账户编号')
- # # maxOverdueNum = None;
- # # if not payRcdDf.empty:
- # # if np.max(payRcdDf['还款状态'])!=0:#没有逾期不要与报告器计算
- # # maxOverdueNum = int(np.max(payRcdDf['还款状态'] + payRcdDf['截至日期月份']))
- # # return maxOverdueNum;
- # # else:
- # # return None;
- # # return None;
- if not df.empty:
- payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
- payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
- payRcdDf = payRcdDf[payRcdDf['还款状态'] > 0]
- # maxOverdueNum = np.max(payRcdDf['还款状态'])
- payRcdTimesDf = payRcdDf.groupby('账户编号', as_index=False)['账户编号'].agg({'次数': 'count'})
- if not payRcdTimesDf.empty:
- maxOverdueNumIndex = np.argmax(payRcdTimesDf['次数'])
- row = payRcdTimesDf.loc[maxOverdueNumIndex, :]
- accountNum = row[0]
- openDate = df[df['账户编号'] == (accountNum)].reset_index(drop=True).loc[0,'开立日期']
- return utils.difMonthReportTime(openDate, reportTime);
- return None;
- #近3月开卡最高额度
- def getLastMonthMaxCreditAmt(creditDf,reportTime,month):
- creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)]
- # creditDf = creditDf.sort_values(by=["发卡机构", "账户授信额度"], ascending=(True, False))
- # creditDf = creditDf.groupby(['发卡机构']).head(1)
- maxCreditAmt = np.max(creditDf['账户授信额度'])
- return maxCreditAmt
- #近3月开卡最低额度
- def getLastMonthMinCreditAmt(creditDf,reportTime,month):
- creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)]
- maxCreditAmt = np.min(creditDf['账户授信额度'])
- return maxCreditAmt
- #近3月开卡平均额度
- def getLastMonthAvgCreditAmt(creditDf,reportTime,month):
- creditDf = creditDf[creditDf['开立日期'] >= utils.getLastMonthDate(reportTime,month)]
- maxCreditAmt = np.mean(creditDf['账户授信额度'])
- return round(maxCreditAmt,2)
- #贷款最近一次还款日期距今时长
- #从“贷款信息”中提取,取客户的正常还款行为,不取因为某种特定的行为而产生的还款,
- # 剔除转出、结清、呆账、呆帐后,取各贷款记录的“最近一次还款日期”的最小值,然后计算距离报告时间的天数,MIN(day(报告时间-最近一次还款日期))
- def getLastPayDateMinDays(df,reportTime):
- return utils.difDateReportTime(reportTime,np.max(df['最近一次还款日期']))
- #连续计算算法
- def calcContinuos(x):
- max=0
- tmp=0
- for a in x:
- if int(a)>=1 :
- tmp=tmp+1
- else:
- if tmp>max:
- max=tmp
- tmp=0
- if max<tmp:
- max=tmp
- return max
- #获取最大连续逾期月份数
- def getContinuousOverdueMonth(payRcdDf,df,month):
- payRcdDf = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
- payRcdDf = payRcdDf.groupby(['账户编号']).head(month)
- accountNoDf = payRcdDf['账户编号'].unique()
- maxContinuousAr = []
- for no in accountNoDf:
- payRcdTmp = payRcdDf[payRcdDf['账户编号'].isin([no])]
- result = payRcdTmp['还款状态'].rolling(month).apply(calcContinuos).max()
- if str(result)!="nan":
- maxContinuousAr.append(result)
- # if str(np.max(maxContinuousAr))=="nan":
- # i=0;
- if len(maxContinuousAr)!=0:
- return int(np.max(maxContinuousAr))
- return None;
- #贷款开户数
- def getOpenAccount(df,reportTime,month):
- if not df.empty:
- tmpDf = df
- openAccountDf = tmpDf[tmpDf['开立日期'] >= utils.getLastMonthDate(reportTime, month)]
- return openAccountDf.index.size
- return 0;
- # 贷记卡账户近3月逾期期数大于或等于“1”的次数
- def getLoanOverdueTimes(payRcdDf, reportTime,times, month):
- payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
- payDate = utils.getLastMonthDate(reportTime,month)
- # payRcdDfTmp = payRcdDfTmp[payRcdDfTmp['还款日期']>=payDate]
- # 汇算帐20210817
- # 改为不包含起始区间的1号,如果报告期为6月,取4,5,6,如果报告期为5月取3,4,5,5月2号 -90天 02-01 不能包含
- payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate) & (payRcdDfTmp['还款日期'] <= reportTime)] # 1208改为区间,两种逻辑都符合
- payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
- overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times].index.size
- return overdueTimes;
- # 贷款账户近3月逾期期数大于或等于“1”的金额-汇算帐新增
- def getLoanOverdueAmt(payRcdDf, reportTime,times, month):
- payRcdDfTmp = payRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
- payDate = utils.getLastMonthDate(reportTime,month)
- payRcdDfTmp = payRcdDfTmp[(payRcdDfTmp['还款日期'] > payDate)&(payRcdDfTmp['还款日期']<=reportTime)]#1208改为区间,两种逻辑都符合
- payRcdDfTmp = utils.replacePayRcdStatusOverdue(payRcdDfTmp)
- overdueTimes = payRcdDfTmp[payRcdDfTmp['还款状态']>=times]
- return np.sum(overdueTimes["还款状态值"]);
- #贷记卡24期还款记录次数
- def getPayRcdCount(payRcdDf,normalDf,month):
- payStatus = ["G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"]
- # 贷款24期还款记录次数 剔除结清 转出 呆账
- payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
- payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
- payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(month)
- # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
- payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
- payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
- return np.max(payRcdTimes)
- #最近6个月有贷款还款记录的月份数
- def getPayRcdCountNew(payRcdDf,reportTime,month):
- # reportTime = str(np.datetime64(reportTime, "M")) + "-02"
- reportTime = utils.get_last_month_first_day_v2(reportTime)
- payDate = utils.getLastMonthDate(reportTime, month)
- # payRcdDfTmp = payRcdDf[payRcdDf['还款日期'] >= payDate]
- payRcdDfTmp = payRcdDf[(payRcdDf['还款日期'] > payDate) & (payRcdDf['还款日期'] <= reportTime)]
- payStatus = ["Z","G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"]
- payRcdTimesDf = payRcdDfTmp[payRcdDfTmp['还款状态'].isin(payStatus)]
- count = payRcdTimesDf["还款日期"].unique().size;
- return count
|