123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101 |
- import pdfplumber
- import pandas as pd
- import numpy as np;
- import sys
- import os
- #指标相关
- import loanIndexParser as lip;
- import payRcdIndexParser as prp;
- import creditCardIndexParser as cip
- import queryInfoIndexParser as qip
- import utils;
- import time;
- import consts;
- import math
- import dfParser;
- pd.set_option('mode.chained_assignment', None)
- import log
- logger = log.logger
- # 查询信息
- dfMap = {};
- allHeaders = [] # 所有表头
- queryInfoDf = pd.DataFrame();
- queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"];
- dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None};
- allHeaders.append(queryInfoDf_header);
- # 身份信息
- identityDf = pd.DataFrame();
- identity_header = ['性别', None, '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱']
- addressDf = pd.DataFrame(); # 通讯地址
- dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None};
- allHeaders.append(identity_header);
- # 配偶信息
- mateDf = pd.DataFrame();
- mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话']
- dfMap["mateDf"] = {"df": mateDf, "nextDf": None};
- allHeaders.append(mateDf_header);
- # 居住信息====暂时该信息没有用到先不解析
- liveInfoDf = pd.DataFrame();
- liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期']
- dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None};
- allHeaders.append(liveInfoDf_header);
- # 职业信息
- occupationInfoDf = pd.DataFrame();
- occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话']
- occupationInfoDf1 = pd.DataFrame();
- # occupationInfo_header1 = ['编号', '职业', '行业', None, None, '职务', '职称', '进入本单位年份', None, '信息更新日期']
- dfMap["occupationInfoDf"] = ({"df": occupationInfoDf, "nextDf": None});
- # allHeaders.append(occupationInfo_header1);
- allHeaders.append(occupationInfo_header);
- # 上次查询记录
- preQueryRcd_header0 = ['上一次查询记录']
- allHeaders.append(preQueryRcd_header0);
- # 查询记录概要
- queryInfoBriefDf = pd.DataFrame();
- queryInfoBrief_header0 = ['最近1个月内的查询机构数', None, '最近1个月内的查询次数', None, None, '最近2年内的查询次数', None, None]
- queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查']
- dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None});
- allHeaders.append(queryInfoBrief_header0);
- allHeaders.append(queryInfoBrief_header1);
- # 信贷交易信息提示
- loanTradeInfoDf = pd.DataFrame();
- loanTradeInfo_header = ['业务类型', None, '账户数', '首笔业务发放月份'];
- dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None});
- allHeaders.append(loanTradeInfo_header)
- # 信贷交易违约信息概要
- # 被追偿信息汇总 资产处置和垫款业务
- recoveryInfoSumDf = pd.DataFrame();
- recoveryInfoSumDf_header = ['业务类型', '账户数', '余额'];
- dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None});
- allHeaders.append(recoveryInfoSumDf_header)
- # 呆账信息汇总
- badDebtsInfoSumDf = pd.DataFrame();
- badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总
- dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None});
- allHeaders.append(badDebtsInfoSumDf_header)
- # 逾期透资信息汇总
- overdueInfoSumDf = pd.DataFrame();
- overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数']
- dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None});
- allHeaders.append(overdueInfoSumDf_header)
- # 非循环贷账户信息汇总
- loanAccountInfoSumDf = pd.DataFrame();
- loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总', None, None, None, None]
- loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
- dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None});
- allHeaders.append(loanAccountInfoSumDf_header0)
- allHeaders.append(loanAccountInfoSumDf_header1)
- # 循环额度下分账户信息汇总
- cycleCreditAccountInfoSumDf = pd.DataFrame();
- cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总', None, None, None, None]
- cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'],
- dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None});
- allHeaders.append(cycleCreditAccountInfoSumDf_header0)
- allHeaders.append(cycleCreditAccountInfoSumDf_header1)
- # 循环贷账户信息汇总
- cycleLoanAccountInfoSumDf = pd.DataFrame();
- cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总', None, None, None, None]
- cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
- dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None});
- allHeaders.append(cycleLoanAccountInfoSumDf_header0)
- allHeaders.append(cycleLoanAccountInfoSumDf_header1)
- # 贷记卡账户信息汇总
- creditCardInfoSumDf = pd.DataFrame();
- creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总', None, None, None, None, None, None]
- creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
- dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None});
- allHeaders.append(creditCardInfoSumDf_header0)
- allHeaders.append(creditCardInfoSumDf_header1)
- # 准贷记卡账户信息汇总
- creditCardInfoSumDfZ = pd.DataFrame();
- creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总', None, None, None, None]
- creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
- dfMap["creditCardInfoDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None});
- allHeaders.append(creditCardInfoSumDfZ_header0)
- allHeaders.append(creditCardInfoSumDfZ_header1)
- # 非循环贷账户,循环额度下分账户
- # 循环贷账户
- loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种']
- loanDfs = [];
- dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []});
- allHeaders.append(loan_header)
- # 贷记卡账户
- creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式']
- creditCardDfs = [];
- dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []});
- allHeaders.append(creditCard_header)
- # 准备贷记卡账户
- creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式']
- creditCardDfsZ = [];
- dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []});
- allHeaders.append(creditCardZ_header)
- #
- # 相关还款责任信息汇总 未使用到
- # 信贷交易信息明细
- # 被追偿信息 未使用到
- # 公共信息明细
- # 强制执行记录
- forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式']
- forceExecRcdDfs = [];
- dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []});
- allHeaders.append(forceExecRcdDfs_header)
- # 查询记录
- queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因']
- dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []});
- allHeaders.append(queryRecordDetailDf_header)
- # 处理分页思路
- # df估计得放到对象里面,然后存储下一个df,一个对象里包含key
- # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df
- # 针对可合并的列的场景
- # =======
- keyList = [] # 存储所有的df的key列表
- # pd.Series()
- # 检查数据是否带表头
- # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下
- import timeit
- # 定义指标部分======================start
- reportTime = ""; # 报告时间
- # 被查询者姓名
- queryInfoName = "";
- queryInfoCardId = "" # 被查询者证件号码
- # 定义指标部分======================end
- # 被查询信息-基础信息
- # 报告时间
- # 被查询者姓名
- # 被查询者证件号码
- # 基础信息
- queryInfo = {"reportTime":""}
- # 身份信息
- identity = {}
- # 配偶信息
- mate = {}
- # 信贷交易信息提示-信用提示
- loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0,
- 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0}
- # 逾期及违约信息概要
- overdueBrief = {}
- # 逾期及透资信息汇总
- # 贷款逾期账户数 loanOverdueAccount
- # 贷款逾期月份数 loanOverdueMonth
- # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal
- # 贷款最长逾期月数 loanMaxOverdueMonth
- overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "",
- "loanMaxOverdueMonth": "",
- "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "",
- "creditCardMaxOverdueMonth": ""}
- # 未结清贷款信息汇总
- # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
- loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0}
- # 未销户贷记卡发卡法人机构数
- # 未销户贷记卡发卡机构数
- # 未销户贷记卡账户数
- # 未销户贷记卡授信总额
- # 未销户贷记卡单家行最高授信额
- # 未销户贷记卡单家行最低授信额
- # 未销户贷记卡已用额度
- # 未销户贷记卡近6月平均使用额度
- # 未结清贷记卡信息汇总
- # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
- creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0,
- "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0}
- # 信 贷 审 批 查 询 记 录 明 细
- queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0,
- "lastTimeLoanApproveMonth": 0}
- #最近一笔结清贷款的贷款金额
- loanAccountInfo = {"lastSettleLoanAmt": 0}
- loanAccountDfs=[];#横向合并
- creditCardAccountDfs=[];#贷记卡账户合并
- #============================指标定义区 start=============================
- #基本信息
- basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0])
- #概要信息
- # briefInfoDf = pd.DataFrame(columns=consts.briefInfoHeader, index=[0])
- #信贷交易信息提示
- briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo, index=[0])
- #被追偿信息汇总及呆账信息汇总
- briefInfoDf_recoveryInfo_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo_badDebtsInfoSum, index=[0])
- #逾期(透支)信息汇总
- briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum, index=[0])
- #信贷交易授信及负债信息概要
- briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo, index=[0])
- #贷款信息
- loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader, index=[0])
- #贷记卡信息
- creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader, index=[0])
- #查询记录明细指标
- queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader, index=[0])
- #============================指标定义区 end=============================
- # 解析被查询信息指标
- def parseQueryInfo(dfObj):
- df = dfObj["df"];
- reportTime = df.loc[0, :][3]
- reportTime = reportTime.split(":")[1]
- reportTime = reportTime.replace(".", "-"); # 报告时间
- queryInfo["reportTime"] = reportTime
- row = df.loc[2, :]
- queryInfo["queryInfoName"] = row[0]; # 被查询者姓名
- basicInfoDf.loc[0, '姓名'] = row[0]
- queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码
- basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "")
- # 婚姻状况
- # 学历
- # 单位电话
- # 住宅电话
- # 通讯地址
- def parseIdentity(dfObj):
- df = dfObj["df"];
- row1 = df.loc[1, :].dropna().reset_index(drop=True)
- # identity["marital"] = row1[3] # 婚姻状况
- # identity["education"] = row1[4] # 学历
- # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址
- basicInfoDf.loc[0, '性别'] = row1[0]
- basicInfoDf.loc[0, '出生年月'] = dfParser.formatDate(row1[1])[0:7]
- basicInfoDf.loc[0, '国籍'] = row1[6]
- basicInfoDf.loc[0, '户籍地址'] = row1[9].replace("\n", "")
- basicInfoDf.loc[0, '婚姻状况'] = row1[2]
- basicInfoDf.loc[0, '学位'] = row1[4]
- basicInfoDf.loc[0, '通讯地址'] = row1[8].replace("\n", "")
- basicInfoDf.loc[0, '就业状况'] = row1[5]
- # mobileDf = dfObj["mobileDf"];
- # basicInfoDf.loc[0, '历史电话号码数'] = mobileDf.index.size
- # basicInfoDf.loc[0, '近3个月电话号码数'] = getLastMonthMobileCount(mobileDf,3)
- #最近几个月电话号码数
- def getLastMonthMobileCount(df, month):
- # 当前日期
- last1MonthDateStr = time.strftime("%Y-%m-%d");
- # 最近一个月
- lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D')
- lastMonthMobileDf = df[df[5] >= str(lastMonthDate)]
- return lastMonthMobileDf.shape[0];
- # 配偶姓名
- # 配偶证件号码
- # 配偶工作单位
- # 配偶联系电话
- def parseMate(dfObj):
- df = dfObj["df"];
- if not df.empty:
- row1 = df.loc[1, :]
- mate["mateName"] = row1[0] # 配偶姓名
- mate["mateCardId"] = row1[2] # 配偶证件号码
- mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位
- mate["mateContactTel"] = row1[4]; # 配偶联系电话
- basicInfoDf.loc[0, '配偶姓名'] = row1[0]
- basicInfoDf.loc[0, '配偶证件号码'] = row1[2]
- basicInfoDf.loc[0, '配偶工作单位'] = row1[3].replace("\n", "");
- basicInfoDf.loc[0, '配偶联系电话'] = row1[4].replace("\n", "");
- #解析居住信息
- def parseLiveInfo(dfObj):
- df = dfObj["df"];
- if not df.empty:
- row1 = df.loc[1, :]
- basicInfoDf.loc[0, '居住地址'] = row1[1]
- basicInfoDf.loc[0, '住宅电话'] = row1[2]
- basicInfoDf.loc[0, '历史居住地个数'] = df.index.size-1;
- curDate = np.datetime64(time.strftime("%Y-%m-%d"));
- last3year = str(curDate)[0:4]
- last3yearDate = str(int(last3year)-3)+str(curDate)[4:10]
- lastLiveDf = df[df[4]>=last3yearDate];
- basicInfoDf.loc[0, '最近3年内居住地个数'] = lastLiveDf.index.size-1;
- houseIndex = df[df[3]=='自置'].index.size>0
- if (houseIndex):
- houseStr = '是'
- else:
- houseStr= '否'
- basicInfoDf.loc[0, '当前住房状态-是否具有自有住房'] = houseStr;
- basicInfoDf.loc[0, '居住状况'] = row1[3]
- basicInfoDf.loc[0, '居住信息更新日期'] = row1[4]
- # 个人住房贷款笔数 perHouseLoanAccount
- # 个人商用房(包括商住两用)贷款笔数 perBusHouseLoanAccount
- # 其他贷款笔数 otherLoanAccount
- # 贷记卡账户数 creditAccount
- # 贷款账龄(月数) loanMonthMin
- # 信用卡账龄(月数)creditCardMonthMin
- # 日期相减离当前时间月份
- # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4
- # def difMonth(dateStr):
- # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) -
- # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30);
- # 信贷交易明细汇总
- def parseLoanTradeInfo(dfObj):
- df = dfObj["df"];
- # row1 = df.loc[1, :]
- loanMonthDf = df[1: 4]
- loanMonthDf = loanMonthDf.reset_index(drop=True)
- loanTradeInfo["perHouseLoanAccount"] = loanMonthDf.loc[0, :][2] # 第0行第二列 个人住房贷款笔数
- briefInfoDf_loanTradeInfo.loc[0, '住房贷款笔数'] = loanMonthDf.loc[0, :][2]
- loanTradeInfo["perBusHouseLoanAccount"] = loanMonthDf.loc[1, :][2] # 第1行第二列 个人商用房(包括商住两用)贷款笔数
- briefInfoDf_loanTradeInfo.loc[0,'个人商用房(包括商住两用)贷款笔数']=loanMonthDf.loc[1, :][2]
- loanTradeInfo["otherLoanAccount"] = loanMonthDf.loc[2, :][2] # 第2行第二列 其他贷款笔数
- briefInfoDf_loanTradeInfo.loc[0, '其他贷款笔数'] = loanMonthDf.loc[2, :][2]
- loanMonthDf = loanMonthDf[loanMonthDf[3] != '--']
- loanMonthMin = loanMonthDf[3].min() # 首笔贷款发放月份最小值
- # if loanMonth != "":
- if loanMonthMin != "" and not math.isnan(float(loanMonthMin)):
- loanMonthMin = utils.difMonth(loanMonthMin)
- loanTradeInfo["loanMonthMin"] = loanMonthMin; # 贷款账龄(月数)
- creditCardDf = df[4: 6];
- creditCardDf = creditCardDf.reset_index(drop=True)
- creditCardMonthDf = creditCardDf[creditCardDf[3] != '--']
- creditCardMonthMin = creditCardMonthDf[3].min() # 首笔贷记卡发放月份最小值
- creditCardMonthDf = creditCardMonthDf.reset_index(drop=True)
- if creditCardMonthMin != "" and not math.isnan(float(creditCardMonthMin)):
- creditCardMonthMin = utils.difMonth(creditCardMonthMin)
- if str(creditCardMonthMin) != "nan":
- loanTradeInfo["creditCardMonthMin"] = creditCardMonthMin; # 信用卡账龄(月数)
- loanTradeInfo["creditAccount"] = creditCardDf.loc[0, :][2] # 第0行第3列 贷记卡账户数
- briefInfoDf_loanTradeInfo.loc[0, '贷记卡账户数'] = creditCardDf.loc[0, :][2]
- loanTradeInfo["creditAccountZ"] = creditCardDf.loc[1, :][2] # 第1行第3列
- briefInfoDf_loanTradeInfo.loc[0, '信用卡账户数'] = creditCardDf.loc[1, :][2]
- if loanTradeInfo["creditAccountZ"] != "--" and loanTradeInfo["creditAccount"] != "--":
- loanTradeInfo["creditAccount"] = int(loanTradeInfo["creditAccount"]) + int(loanTradeInfo["creditAccountZ"])
- # 解析呆账信息
- def parseBadDebtsInfoSumDf(dfObj):
- df = dfObj["df"];
- if not df.empty:
- row1 = df.loc[1, :]
- overdueBrief["badDebtsInfoSumAccount"] = row1[0]; # 呆账信息汇总笔数
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账业务账户数'] = row1[0];
- overdueBrief["badDebtsInfoSumAmt"] = row1[1]; # 呆账信息汇总余额
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账信息余额'] = row1[1];
- else:
- overdueBrief["badDebtsInfoSumAccount"] = ""; # 呆账信息汇总笔数
- overdueBrief["badDebtsInfoSumAmt"] = ""; # 呆账信息汇总余额
- # 解析被追偿信息
- def parseRecoveryInfo(dfObj):
- df = dfObj["df"];
- if not df.empty:
- row1 = df.loc[1, :]
- row2 = df.loc[2, :]
- row3 = df.loc[3, :]
- overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户数'] = row1[1];
- overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户余额'] = replaceAmt(row1[2]);
- overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户数'] = row2[1];
- overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户余额'] = replaceAmt(row2[2]);
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总数'] = row3[1];
- briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总额'] = replaceAmt(row3[2]);
- else:
- overdueBrief["disposalInfoSumAccount"] = ""; # 资产处置信息汇总笔数
- overdueBrief["disposalInfoSumAmt"] = ""; # 资产处置信息汇总余额
- overdueBrief["advanceInfoSumAccount"] = ""; # 垫款业务笔数
- overdueBrief["advanceInfoSumAmt"] = ""; # 垫款业务余额
- # 贷款逾期账户数
- # 贷款逾期月份数
- # 贷款单月最高逾期总额
- # 贷款最长逾期月数
- def parseOverdueInfo(dfObj):
- df = dfObj["df"];
- if not df.empty:
- row2= df.loc[2, :]
- row3 = df.loc[3, :]
- row4 = df.loc[4, :]
- row5 = df.loc[5, :]
- row6 = df.loc[6, :]
- briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数'] = row2[1];
- briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数月数'] = row2[2];
- briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户单月最高逾期/透支总额'] = row2[3];
- briefInfoDf_overdueInfoSum.loc[0, '循环额度下分账户数'] = row3[1];
- briefInfoDf_overdueInfoSum.loc[0, '循环贷帐户数'] = row4[1];
- briefInfoDf_overdueInfoSum.loc[0, '贷记卡账户数'] = row5[1];
- briefInfoDf_overdueInfoSum.loc[0, '准贷记卡账户数'] = row6[1];
- # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum
- # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum
- # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum
- # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits
- # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance
- # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount
- # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数
- # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%')
- # 2.贷款额度>100元
- # 3.贷款状态不为“结清”"
- # 非循环贷账户信息汇总
- def doFilterCalc(dfx):
- dfx = dfx.replace('--', 0)
- return dfx;
- # 科学计数法转换
- def replaceAmt(dfx):
- return dfx.str.replace(',', '')
- # 非循环贷账户信息汇总 如有循环贷款和额度循环的需要进行汇总 TODO
- def parseLoanAccountInfoSum(dfObj):
- df = dfObj["df"];
- if not df.empty:
- loanAccountInfoSumDf = df[2:3];
- loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0
- loanAccountInfoSum["mgrOrgCount"] = np.sum(loanAccountInfoSumDf[0].astype('int'))
- loanAccountInfoSum["account"] = np.sum(loanAccountInfoSumDf[1].astype('int'))
- loanAccountInfoSum["creditTotalAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int'))
- loanAccountInfoSum["balance"] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int'))
- loanAccountInfoSum["last6AvgPayAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款法人机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款笔数'] = np.sum(loanAccountInfoSumDf[1].astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同总额'] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同余额'] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款近6月平均应还款'] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int'))
- # {"awardOrgCount":0,"account":0,"creditTotalAmt":0,"perMaxCreditTotalAmt":0,"perMinCreditTotalAmt":0,"useAmt":0,"last6AvgUseAmt":0}
- # 解析贷记卡信息汇总,包含准贷记卡
- def parseCreditCardInfoSum(dfObj):
- df = dfObj["df"];
- if not df.empty:
- creditCardInfoSumDf = df[2:3];
- creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0
- creditCardInfoSum["awardOrgCount"] = np.sum(creditCardInfoSumDf[0].astype('int'))
- creditCardInfoSum["account"] = np.sum(creditCardInfoSumDf[1].astype('int'))
- creditCardInfoSum["creditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int'))
- creditCardInfoSum["perMaxCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int'))
- creditCardInfoSum["perMinCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int'))
- creditCardInfoSum["useAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int'))
- creditCardInfoSum["last6AvgUseAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡发卡机构数'] = np.sum(creditCardInfoSumDf[0].astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡账户数'] = np.sum(creditCardInfoSumDf[1].astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡授信总金额'] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最高金额'] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最低金额'] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡已用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int'))
- briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡最近6个月平均使用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int'))
- # 解析查询记录明细
- def parseQueryInfoDetail(dfObj):
- df = dfObj["df"];
- reportTime = queryInfo["reportTime"];
- if not df.empty:
- df = utils.replaceDateCol(df)
- df = df[1:df.index.size] # 去掉表头
- queryRecordDetail["last1MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 1, "",reportTime) # 去掉表头
- queryRecordDetail["last3MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 3, "",reportTime)
- queryRecordDetail["last3MothLoanApproveTimes"] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove,reportTime)
- queryRecordDetailDf.loc[0,'近1月查询次数'] = qip.getLastMonthQueryTimes(df, 1, "",reportTime)
- queryRecordDetailDf.loc[0, '近3月查询次数'] = qip.getLastMonthQueryTimes(df, 3, "",reportTime)
- queryRecordDetailDf.loc[0, '近6月查询次数'] = qip.getLastMonthQueryTimes(df, 6, "", reportTime)
- queryRecordDetailDf.loc[0, '近12月查询次数'] = qip.getLastMonthQueryTimes(df, 12, "", reportTime)
- queryRecordDetailDf.loc[0, '近3月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近3月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
- queryRecordDetailDf.loc[0, '近6月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
- queryRecordDetailDf.loc[0, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近12月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
- queryRecordDetailDf.loc[0, '近3月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近3月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
- queryRecordDetailDf.loc[0, '近6月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
- queryRecordDetailDf.loc[0, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
- queryRecordDetailDf.loc[0, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
- queryRecordDetailDf.loc[0, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
- queryRecordDetailDf.loc[0, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
- queryRecordDetailDf.loc[0, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
- queryRecordDetailDf.loc[0, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
- queryRecordDetailDf.loc[0, '最近12个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
- queryRecordDetailDf.loc[0, '最近12个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
- #解析贷款还款记录指标
- def parseLoanMergeAndPayRecordDf(df,payRcdDf):
- if not df.empty and not payRcdDf.empty:
- #正常
- normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
- overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
- overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
- #临时保存,不用过滤还款状态为0的
- payRcdMaxOverdueDf = overduePayRcdDf;
- overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0]
- loanAccountInfoDf.loc[0, '当前贷款逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
- loanAccountInfoDf.loc[0, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期账户数']/df.index.size,2)
- #存在逾期的贷款账户 非结清的过滤出逾期的账户号
- overdueLoanDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
- loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size
- loanAccountInfoDf.loc[0, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] / df['管理机构'].unique().size,2)
- #还款记录按日期排序最近3笔的最大逾期期数
- loanAccountInfoDf.loc[0, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1);
- loanAccountInfoDf.loc[0, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
- loanAccountInfoDf.loc[0, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
- loanAccountInfoDf.loc[0, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
- loanAccountInfoDf.loc[0, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
- loanAccountInfoDf.loc[0, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
- payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"]
- # 贷款24期还款记录次数 剔除结清 转出 呆账
- payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
- #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
- payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
- payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
- #payRcdDf[(payRcdDf['还款状态']!='') & (payRcdDf['账户编号']==1)].index.size
- loanAccountInfoDf.loc[0, '贷款24期还款记录次数'] = np.max(payRcdTimes)
- #解析贷款账户信息指标
- def parseLoanMergeDf(df):
- if not df.empty:
- sortDf = df.sort_values(by=["开立日期","借款金额(本金)"] , ascending=(False,False))
- sortDf = sortDf[sortDf['账户状态'] == '结清'];
- sortDf = sortDf.reset_index(drop=True)
- if not sortDf.empty:
- row0 = sortDf.loc[0, :]
- loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)']
- loanAccountInfoDf.loc[0, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)']
- openDate = dfParser.formatDate(row0['开立日期'])
- loanAccountInfoDf.loc[0, '最近一笔结清贷款的发放距今月数'] = utils.difMonth(openDate)
- settleDate = dfParser.formatDate(row0['账户关闭日期'])
- loanAccountInfoDf.loc[0, '最近一笔结清贷款的结清距今月数'] = utils.difMonth(settleDate)
- loanAccountInfoDf.loc[0, '历史贷款总法人机构数'] = df['管理机构'].unique().size
- loanAccountInfoDf.loc[0, '当前同时在用的贷款机构数'] = df[df['借款金额(本金)']>0]['管理机构'].unique().size
- statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
- bankDf = statusDf[statusDf['管理机构'].str.contains('银行')]
- #没有记录
- if statusDf.index.size==0:
- isNotBankCust = -1
- else:
- if bankDf.index.size >0:#有一条以上不为结清,请包含银行
- isNotBankCust = 1;
- else:
- isNotBankCust = 0;
- loanAccountInfoDf.loc[0, '是否有非银贷款客户'] = isNotBankCust
- #最严重的五级分类
- fiveType = ""
- for fiveTypeTmp in consts.fiveType:
- fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp];
- if not fiveTypeDf.empty:
- fiveType = fiveTypeTmp;
- break;
- loanAccountInfoDf.loc[0, '贷款五级分类'] = fiveType
- #当前贷款LTV
- # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0
- # 的记录后,SUM(本金余额) / SUM(贷款本金)
- # 如本金余额为空和贷款本金为0或为空,则当条记录不计算
- loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')]
- badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))]
- balanceSum = np.sum(badSetDf['余额(本金)'].astype('int'))
- loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int'))
- if(loanAmtSum !=0):
- loanAccountInfoDf.loc[0, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2)
- loanAccountInfoDf.loc[0, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2)
- loanAccountInfoDf.loc[0, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
- loanAccountInfoDf.loc[0, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
- houseLtvList = consts.houseLtvList;
- # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)]
- # if not houseLtvDf.empty:
- # loanAccountInfoDf.loc[0, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2)
- #['个人住房贷款','个人商用房(包括商住两用)贷款']
- loanAccountInfoDf.loc[0, '当前房贷LTV'] = lip.getCurLtv(badSetDf,houseLtvList)
- cardLtvList = ['个人汽车消费贷款']
- loanAccountInfoDf.loc[0, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList)
- operateLtvList = ['个人经营性贷款']
- loanAccountInfoDf.loc[0, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList)
- consumeLtvList = ['其他个人消费贷款']
- loanAccountInfoDf.loc[0, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList)
- bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行']
- loanAccountInfoDf.loc[0, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
- bankLtvList = ['消费金融有限公司','汽车金融公司','信托投资']
- loanAccountInfoDf.loc[0, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
- smallLoanLtvList = ['机构','小额信贷公司','财务公司']
- loanAccountInfoDf.loc[0, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList)
- #当前贷款最大逾期期数
- # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数)
- loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
- if not loanOverdueLtvDf.empty:
- loanAccountInfoDf.loc[0, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数'])
- loanAccountInfoDf.loc[0, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额'])
- loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True)
- maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数'])
- loanAccountInfoDf.loc[0, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额']
- loanAccountInfoDf.loc[0, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里
- loanAccountInfoDf.loc[0, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3)
- loanAccountInfoDf.loc[0, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3)
- loanAccountInfoDf.loc[0, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6)
- loanAccountInfoDf.loc[0, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6)
- loanAccountInfoDf.loc[0, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6)
- loanAccountInfoDf.loc[0, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12)
- loanAccountInfoDf.loc[0, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12)
- loanAccountInfoDf.loc[0, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12)
- lastLoanDf = loanOverdueLtvDf;
- if not lastLoanDf.empty:
- loanAccountInfoDf.loc[0, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"])
- normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)]
- #未结清贷款总账户数:账户状态不等于结清和转出的记录数
- notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
- if not notSettleDf.empty:
- loanAccountInfoDf.loc[0, '当前正常贷款账户数'] = normalDf.index.size
- loanAccountInfoDf.loc[0, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2)
- loanAccountInfoDf.loc[0, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)'])
- # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总)
- loanAccountInfoDf.loc[0, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']),2)
- settleDf = df[(df['账户状态'] == '结清')]
- loanAccountInfoDf.loc[0, '当前正常结清贷款账户数'] = settleDf.index.size
- loanAccountInfoDf.loc[0, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2)
- #贷款24期还款记录次数 TODO
- # 最近3个月个人消费贷款发放额度
- loanAccountInfoDf.loc[0, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款'])
- loanAccountInfoDf.loc[0, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3)
- loanAccountInfoDf.loc[0, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6)
- loanAccountInfoDf.loc[0, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12)
- #解析贷记卡账户信息指标
- def parseCreditCardMergeDf(df):
- if not df.empty:
- # 历史信用卡总法人机构数
- creditCardAccountInfoDf.loc[0,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size
- creditCardUseDf = df[df['已用额度']>0];
- creditCardAccountInfoDf.loc[0,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size
- #统一排除
- creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
- creditCardAccountInfoDf.loc[0,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf)
- creditCardAccountInfoDf.loc[0, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf)
- creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9)
- creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, df, 1)
- # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度)
- useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
- totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
- creditCardAccountInfoDf.loc[0, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2)
- creditCardAccountInfoDf.loc[0, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2)
- creditCardAccountInfoDf.loc[0, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2)
- creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数'] = np.max(creditDf['当前逾期期数'])
- creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期金额'] = np.max(creditDf['当前逾期总额'])
- if not creditDf.empty:
- creditDf = creditDf.reset_index(drop=True)
- maxOverdueIndex = np.argmax(creditDf['当前逾期期数'])
- creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额']
- creditCardAccountInfoDf.loc[0, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3)
- creditCardAccountInfoDf.loc[0, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3)
- creditCardAccountInfoDf.loc[0, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3)
- creditCardAccountInfoDf.loc[0, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6)
- creditCardAccountInfoDf.loc[0, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6)
- creditCardAccountInfoDf.loc[0, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6)
- creditCardAccountInfoDf.loc[0, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12)
- creditCardAccountInfoDf.loc[0, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12)
- creditCardAccountInfoDf.loc[0, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12)
- if not creditDf.empty:
- creditCardAccountInfoDf.loc[0, '信用卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"])
- creditCardAccountInfoDf.loc[0, '贷记卡还款比例'] = round(np.sum(creditDf['本月应还款'])/np.sum(creditDf['本月实还款']),2)
- creditCardAccountInfoDf.loc[0, '贷记卡最高还款比例'] = round(np.max(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
- creditCardAccountInfoDf.loc[0, '贷记卡最低还款比例'] = round(np.min(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
- normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
- notCloseDf = df[(df['账户状态'] != '销户')]
- if not notCloseDf.empty and not normalDf.empty:
- creditCardAccountInfoDf.loc[0, '当前正常信用卡账户数'] = round(normalDf.index.size/notCloseDf.index.size,2)
- creditCardAccountInfoDf.loc[0, '当前正常信用卡已用额度'] = np.sum(normalDf['已用额度'])
- creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2)
- creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数'] = normalDf[normalDf['已用额度']>0].index.size
- if notCloseDf.empty:
- creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数占比'] = -99
- else:
- creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数']/notCloseDf.index.size,2)
- creditCardAccountInfoDf.loc[0, '贷记卡本月实还款金额'] = np.sum(creditDf['本月实还款'])
- maxAmtDf = df[(df['币种'] == '人民币元')]
- if not maxAmtDf.empty:
- maxAmtDf = maxAmtDf.reset_index(drop=True)
- maxAmtIndex = np.argmax(maxAmtDf['账户授信额度'])
- maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期'];
- creditCardAccountInfoDf.loc[0, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"])+1;
- #解析贷款还款记录指标
- def parseCreditCardMergeAndPayRecordDf(df,payRcdDf):
- if not df.empty and not payRcdDf.empty:
- # 正常
- normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
- if not normalDf.empty:
- overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
- overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
- # 临时保存,不用过滤还款状态为0的
- payRcdMaxOverdueDf = overduePayRcdDf;
- overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0]
- creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
- #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数)
- creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] / normalDf.index.size, 2)
- #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数
- overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
- creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size
- creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2)
- creditCardAccountInfoDf.loc[0, '近3月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
- creditCardAccountInfoDf.loc[0, '近6月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
- creditCardAccountInfoDf.loc[0, '近9月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
- creditCardAccountInfoDf.loc[0, '近12月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
- creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
- creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
- creditCardAccountInfoDf.loc[0, '最近3个月信用卡最大连续逾期月份数'] = 0;
- payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
- payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
- creditCardAccountInfoDf.loc[0, '贷记卡24期还款记录次数'] = np.max(payRcdTimes)
- def main(pdf_path):
- # 解析pdf开始
- with pdfplumber.open(pdf_path) as pdf:
- for p in range(0, len(pdf.pages)):
- page = pdf.pages[p]
- # first_page = pdf.pages[1]
- # if p == 3:
- # print(3)
- for i in range(0, len(page.extract_tables())):
- table = page.extract_tables()[i]
- df = pd.DataFrame(table);
- if len(keyList) > 1 and i == 0: # 判断是否被分页了
- if not utils.checkHeader(df, allHeaders):
- key = keyList[-1];
- dfObj = dfMap[key]
- # dfObj["nextDf"]=df;
- # 贷款信息 贷记卡信息 强制执行记录
- if key == "loanDfs" or key == "creditCardDfs" or key == "forceExecRcdDfs": # 属于列表
- lastDfObj = dfObj["dfs"][-1];
- lastDfObj["isByPage"] = str(p + 1);
- if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同
- lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0,
- ignore_index=True); # 去最后一个进行合并
- # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
- else:
- # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
- lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True);
- else: # 查询记录明细 为单个列表
- dfObj["isByPage"] = str(p + 1);
- if len(dfObj["df"].columns) == len(df.columns):
- # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
- dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
- else:
- # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
- dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
- # dfObj["nextDf"] = df;
- # 如果列数相等合并df
- continue;
- headerList0 = df.loc[0, :].tolist() # 第0行为表头
- headerList1 = []
- if df.index.size>1:
- headerList1 = df.loc[1, :].tolist() # 第1行为表头
- if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据
- queryInfoDf = df;
- dfKey = "queryInfoDf"
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == identity_header: # 身份信息
- identityDf = df[:2] # 截取前2行
- addressDf = df.iloc[2:4, [0, 5]] # 截取3到4行的第一和6
- addressDf = addressDf.reset_index(drop=True)
- mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5)
- identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并
- dfKey = "identityDf"
- dfMap[dfKey]["df"] = identityDf;
- keyList.append(dfKey);
- # 组装电话号码df
- dfMap[dfKey]["mobileDf "] = mobileDf
- elif headerList0 == mateDf_header: # 配偶信息
- mateDf = df;
- dfKey = "mateDf"
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == liveInfoDf_header: # 居住信息
- mateDf = df;
- dfKey = "liveInfoDf"
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页
- occupationInfoDf = df;
- dfKey = "occupationInfoDf"
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据
- queryInfoBriefDf = df;
- dfKey = "queryInfoBriefDf"
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == loanTradeInfo_header: # 信贷交易信息
- loanTradeInfoDf = df;
- dfKey = "loanTradeInfoDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == recoveryInfoSumDf_header: # 被追偿信息汇总
- recoveryInfoSumDf = df;
- dfKey = "recoveryInfoSumDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == badDebtsInfoSumDf_header: # 呆账信息
- badDebtsInfoSumDf = df;
- dfKey = "badDebtsInfoSumDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总
- overdueInfoSumDf = df;
- dfKey = "overdueInfoSumDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总
- loanAccountInfoSumDf = df;
- dfKey = "loanAccountInfoSumDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总
- creditCardInfoSumDf = df;
- dfKey = "creditCardInfoSumDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据
- creditCardInfoSumDfZ = df;
- dfKey = "creditCardInfoSumDfZ";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif list(filter(None, headerList0)) == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户
- dfKey = "loanDfs";
- dfMap[dfKey]["dfs"].append({"df": df});
- keyList.append(dfKey);
- elif list(filter(None, headerList0)) == creditCard_header: # 贷记卡账户
- dfKey = "creditCardDfs";
- dfMap[dfKey]["dfs"].append({"df": df});
- keyList.append(dfKey);
- elif list(filter(None, headerList0)) == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并
- dfKey = "creditCardDfsZ";
- dfMap[dfKey]["dfs"].append({"df": df});
- keyList.append(dfKey);
- elif list(filter(None, headerList0)) == queryRecordDetailDf_header: # 查询记录明细
- dfKey = "queryRecordDetailDf";
- dfMap[dfKey]["df"] = df;
- keyList.append(dfKey);
- elif list(filter(None, headerList0)) == forceExecRcdDfs_header: # 强制执行记录
- dfKey = "forceExecRcdDfs";
- dfMap[dfKey]["dfs"].append({"df": df});
- keyList.append(dfKey);
- # 设置分页
- dfMap[dfKey]["page"] = p + 1;
- # 打印结果解析并构建指标
- for key in dfMap:
- if dfMap[key].__contains__("page"):
- logger.info(key + "-page-" + str(dfMap[key]["page"]))
- if dfMap[key].__contains__("dfs"):
- if key == "loanDfs": # 贷款账户
- for idx in range(0, len(dfMap[key]["dfs"])):
- tempDfObj = dfMap[key]["dfs"][idx];
- if tempDfObj.__contains__("isByPage"):
- # print("贷款账户被分页#################")
- # print(key + "============被分页页数============" + str(tempDfObj["isByPage"]))
- loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
- logger.info(tempDfObj["df"].values)
- else: # 未被分页
- logger.info(tempDfObj["df"].values)
- loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
- elif key == "creditCardDfs": # 贷记卡账户合并
- for idx in range(0, len(dfMap[key]["dfs"])):
- tempDfObj = dfMap[key]["dfs"][idx];
- creditCardAccountDfs.append(dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime']))
- else: # 其他
- for tempDfObj in (dfMap[key]["dfs"]):
- if tempDfObj.__contains__("isByPage"):
- logger.info(key + "============其他被分页页数============" + str(tempDfObj["isByPage"]))
- logger.info(tempDfObj["df"].values)
- else: # 单笔
- tempDfObj = dfMap[key];
- if tempDfObj.__contains__("isByPage"):
- logger.info(key + "============被分页页数================" + str(tempDfObj["isByPage"]))
- logger.info(tempDfObj["df"].values)
- if key == "queryInfoDf": # 解析被查询信息
- parseQueryInfo(tempDfObj);
- # print("\033[1;31m +查询信息+ \033[0m")
- # print(queryInfo)
- elif key == "identityDf": # 身份信息
- parseIdentity(tempDfObj)
- # print("\033[1;31m +身份信息+ \033[0m")
- # print(identity)
- elif key == "mateDf": # 配偶信息
- parseMate(tempDfObj)
- # print("\033[1;31m +配偶信息+ \033[0m")
- # print(mate)
- elif key == "liveInfoDf": # 居住信息
- parseLiveInfo(tempDfObj)
- # print("\033[1;31m +居住信息+ \033[0m")
- elif key == "loanTradeInfoDf": # 信贷交易信息提示
- parseLoanTradeInfo(tempDfObj);
- # print("\033[1;31m +信贷交易信息提示+ \033[0m")
- # print(loanTradeInfo)
- elif key == "badDebtsInfoSumDf": # 呆账信息汇总
- parseBadDebtsInfoSumDf(tempDfObj)
- # print("\033[1;31m +呆账信息汇总+ \033[0m")
- # print(overdueBrief)
- elif key == "recoveryInfoDf": # 被追偿信息汇总-资产处置和垫款
- parseRecoveryInfo(tempDfObj)
- # print("\033[1;31m +资产处置和垫款+ \033[0m")
- # print(overdueBrief)
- elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总
- parseOverdueInfo(tempDfObj)
- # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m")
- # print(overdueInfo)
- elif key == "loanAccountInfoSumDf": # 需要纳入循环贷及额度下循环分账户 TODO
- tempDfObj_cycleLoanAccount = dfMap["cycleLoanAccountInfoSumDf"];
- tempDfObj_cycleCredit = dfMap["cycleCreditAccountInfoSumDf"];
- if not tempDfObj_cycleLoanAccount["df"].empty: # 循环贷
- tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleLoanAccount["df"][2:3]], axis=0,
- ignore_index=True)
- if not tempDfObj_cycleCredit["df"].empty: # 额度下循环分账户
- tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleCredit["df"][2:3]], axis=0,
- ignore_index=True)
- parseLoanAccountInfoSum(tempDfObj)
- # print("\033[1;31m +贷款信息汇总+ \033[0m")
- # print(loanAccountInfoSum)
- elif key == "creditCardInfoSumDf":
- tempDfObjZ = dfMap["creditCardInfoDfZ"]; # 准贷记卡纳入计算 2:3为准贷记卡数据
- if not tempDfObjZ["df"].empty:
- tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObjZ["df"][2:3]], axis=0, ignore_index=True)
- parseCreditCardInfoSum(tempDfObj)
- # print("\033[1;31m +贷记卡信息汇总+ \033[0m")
- # print(creditCardInfoSum)
- elif key == "queryRecordDetailDf": # 查询记录明细
- parseQueryInfoDetail(tempDfObj)
- # print("\033[1;31m +查询记录明细+ \033[0m")
- # print(queryInfoDetail)
- result = ""
- # 基本信息
- result+=("\033[1;34m +基本信息+ \033[0m")+"\n"
- result+=utils.toJson(basicInfoDf)+"\n"
- result+=("\033[1;34m +概要信息+ \033[0m")+"\n"
- result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+"\n"
- result+=utils.toJson(briefInfoDf_loanTradeInfo)+"\n"
- result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+"\n"
- result+=utils.toJson(briefInfoDf_recoveryInfo_badDebtsInfoSum)+"\n"
- result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+"\n"
- result+=utils.toJson(briefInfoDf_overdueInfoSum)+"\n"
- result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+"\n"
- result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+"\n"
- # 单独输出贷款df
- logger.info("\033[1;34m +贷款信息Dataframe+ \033[0m")
- logger.info(dfParser.dfHeaderLoan)
- loanMergeDf = pd.DataFrame()
- loanPayRecordMergeDf = pd.DataFrame()
- # 输出数据
- for loanDfObj in loanAccountDfs:
- loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True);
- loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0,
- ignore_index=True);
- logger.info(loanMergeDf.values)
- logger.info("\033[1;34m +贷款信息还款记录Dataframe+ \033[0m")
- logger.info(dfParser.dfHeaderLoanPayRecord)
- logger.info(loanPayRecordMergeDf.values)
- # 解析贷款账户指标
- parseLoanMergeDf(loanMergeDf);
- # 解析还款记录相关指标
- parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf);
- # logger.info(loanAccountInfo)
- logger.info(consts.loanAccountInfoHeader)
- logger.info(loanAccountInfoDf.values)
- result+=("\033[1;34m +贷款账户信息+ \033[0m")+"\n"
- result+=utils.toJson(loanAccountInfoDf)+"\n"
- #贷记卡合并df
- creditCardMergeDf = pd.DataFrame()
- creditCardPayRecordMergeDf = pd.DataFrame()
- logger.info("\033[1;34m +贷记卡信息Dataframe+ \033[0m")
- logger.info(dfParser.dfHeaderCreditCard)
- # 输出数据
- for creditCardDfObj in creditCardAccountDfs:
- creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True);
- creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True);
- logger.info(creditCardMergeDf.values)
- # 解析贷记卡账户指标
- parseCreditCardMergeDf(creditCardMergeDf);
- parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf)
- result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+"\n"
- result+=utils.toJson(creditCardAccountInfoDf)+"\n"
- result+=("\033[1;34m +查询记录明细+ \033[0m")+"\n"
- result+=utils.toJson(queryRecordDetailDf)+"\n"
- return result;
- # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig')
- if __name__ == '__main__':
- start = timeit.default_timer();
- basePath = "D:/mydocument/myproject/git/busscredit/Crerdai/";
- pdf_path = basePath + "闻海雁532329198801060347.pdf"
- pdf_path = basePath+"雷雨晴130630199006130027.pdf"
- pdf_path=basePath+"杨安140402197102111236.pdf"
- pdf_path=basePath+"刘盼兰130133198912261210.pdf"
- pdf_path=basePath+"马维强130521198604045272.pdf"
- pdf_path = basePath + "郑晨晨130681199008205811.pdf"
- # pdf_path=basePath+"人行征信模拟数据报告.pdf"
- basePath = "D:/mydocument/myproject/git/busscredit/20200414_report/";
- pdf_path = basePath + "艾思语51112319960218732X.pdf"
- isBat = False;
- if isBat:
- for file in os.listdir(basePath):
- if file.endswith("pdf"):
- pdf_path = basePath+file;
- outPath = pdf_path.replace("pdf",'txt')
- if os.path.exists(outPath):
- continue;
- logger.info(file + "解析开始...")
- result = main(pdf_path)
- # print(result)
- #输出到文件
- sys.stdout = open(outPath, mode='w', encoding='utf-8')
- print(result.replace("\033[1;34m","").replace("\033[0m",""))
- logger.info(file+"解析完成")
- else:
- outPath = pdf_path.replace("pdf", 'txt')
- result = main(pdf_path)
- sys.stdout = open(outPath, mode='w', encoding='utf-8')
- print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
- s = timeit.default_timer() - start;
- logger.info(str(s) + " 秒")
|