import pdfplumber import pandas as pd import numpy as np; import sys import os #指标相关 import loanIndexParser as lip; import payRcdIndexParser as prp; import creditCardIndexParser as cip import queryInfoIndexParser as qip import utils; import time; import consts; import math import dfParser; pd.set_option('mode.chained_assignment', None) import log logger = log.logger # 查询信息 dfMap = {}; allHeaders = [] # 所有表头 queryInfoDf = pd.DataFrame(); queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"]; dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None}; allHeaders.append(queryInfoDf_header); # 身份信息 identityDf = pd.DataFrame(); identity_header = ['性别', None, '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱'] addressDf = pd.DataFrame(); # 通讯地址 dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None}; allHeaders.append(identity_header); # 配偶信息 mateDf = pd.DataFrame(); mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话'] dfMap["mateDf"] = {"df": mateDf, "nextDf": None}; allHeaders.append(mateDf_header); # 居住信息====暂时该信息没有用到先不解析 liveInfoDf = pd.DataFrame(); liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期'] dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None}; allHeaders.append(liveInfoDf_header); # 职业信息 occupationInfoDf = pd.DataFrame(); occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话'] occupationInfoDf1 = pd.DataFrame(); # occupationInfo_header1 = ['编号', '职业', '行业', None, None, '职务', '职称', '进入本单位年份', None, '信息更新日期'] dfMap["occupationInfoDf"] = ({"df": occupationInfoDf, "nextDf": None}); # allHeaders.append(occupationInfo_header1); allHeaders.append(occupationInfo_header); # 上次查询记录 preQueryRcd_header0 = ['上一次查询记录'] allHeaders.append(preQueryRcd_header0); # 查询记录概要 queryInfoBriefDf = pd.DataFrame(); queryInfoBrief_header0 = ['最近1个月内的查询机构数', None, '最近1个月内的查询次数', None, None, '最近2年内的查询次数', None, None] queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查'] dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None}); allHeaders.append(queryInfoBrief_header0); allHeaders.append(queryInfoBrief_header1); # 信贷交易信息提示 loanTradeInfoDf = pd.DataFrame(); loanTradeInfo_header = ['业务类型', None, '账户数', '首笔业务发放月份']; dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None}); allHeaders.append(loanTradeInfo_header) # 信贷交易违约信息概要 # 被追偿信息汇总 资产处置和垫款业务 recoveryInfoSumDf = pd.DataFrame(); recoveryInfoSumDf_header = ['业务类型', '账户数', '余额']; dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None}); allHeaders.append(recoveryInfoSumDf_header) # 呆账信息汇总 badDebtsInfoSumDf = pd.DataFrame(); badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总 dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None}); allHeaders.append(badDebtsInfoSumDf_header) # 逾期透资信息汇总 overdueInfoSumDf = pd.DataFrame(); overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数'] dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None}); allHeaders.append(overdueInfoSumDf_header) # 非循环贷账户信息汇总 loanAccountInfoSumDf = pd.DataFrame(); loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总', None, None, None, None] loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'] dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None}); allHeaders.append(loanAccountInfoSumDf_header0) allHeaders.append(loanAccountInfoSumDf_header1) # 循环额度下分账户信息汇总 cycleCreditAccountInfoSumDf = pd.DataFrame(); cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总', None, None, None, None] cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'], dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None}); allHeaders.append(cycleCreditAccountInfoSumDf_header0) allHeaders.append(cycleCreditAccountInfoSumDf_header1) # 循环贷账户信息汇总 cycleLoanAccountInfoSumDf = pd.DataFrame(); cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总', None, None, None, None] cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'] dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None}); allHeaders.append(cycleLoanAccountInfoSumDf_header0) allHeaders.append(cycleLoanAccountInfoSumDf_header1) # 贷记卡账户信息汇总 creditCardInfoSumDf = pd.DataFrame(); creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总', None, None, None, None, None, None] creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度'] dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None}); allHeaders.append(creditCardInfoSumDf_header0) allHeaders.append(creditCardInfoSumDf_header1) # 准贷记卡账户信息汇总 creditCardInfoSumDfZ = pd.DataFrame(); creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总', None, None, None, None] creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度'] dfMap["creditCardInfoDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None}); allHeaders.append(creditCardInfoSumDfZ_header0) allHeaders.append(creditCardInfoSumDfZ_header1) # 非循环贷账户,循环额度下分账户 # 循环贷账户 loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种'] loanDfs = []; dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []}); allHeaders.append(loan_header) # 贷记卡账户 creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式'] creditCardDfs = []; dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []}); allHeaders.append(creditCard_header) # 准备贷记卡账户 creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式'] creditCardDfsZ = []; dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []}); allHeaders.append(creditCardZ_header) # # 相关还款责任信息汇总 未使用到 # 信贷交易信息明细 # 被追偿信息 未使用到 # 公共信息明细 # 强制执行记录 forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式'] forceExecRcdDfs = []; dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []}); allHeaders.append(forceExecRcdDfs_header) # 查询记录 queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因'] dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []}); allHeaders.append(queryRecordDetailDf_header) # 处理分页思路 # df估计得放到对象里面,然后存储下一个df,一个对象里包含key # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df # 针对可合并的列的场景 # ======= keyList = [] # 存储所有的df的key列表 # pd.Series() # 检查数据是否带表头 # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下 import timeit # 定义指标部分======================start reportTime = ""; # 报告时间 # 被查询者姓名 queryInfoName = ""; queryInfoCardId = "" # 被查询者证件号码 # 定义指标部分======================end # 被查询信息-基础信息 # 报告时间 # 被查询者姓名 # 被查询者证件号码 # 基础信息 queryInfo = {"reportTime":""} # 身份信息 identity = {} # 配偶信息 mate = {} # 信贷交易信息提示-信用提示 loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0, 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0} # 逾期及违约信息概要 overdueBrief = {} # 逾期及透资信息汇总 # 贷款逾期账户数 loanOverdueAccount # 贷款逾期月份数 loanOverdueMonth # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal # 贷款最长逾期月数 loanMaxOverdueMonth overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "", "loanMaxOverdueMonth": "", "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "", "creditCardMaxOverdueMonth": ""} # 未结清贷款信息汇总 # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'] loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0} # 未销户贷记卡发卡法人机构数 # 未销户贷记卡发卡机构数 # 未销户贷记卡账户数 # 未销户贷记卡授信总额 # 未销户贷记卡单家行最高授信额 # 未销户贷记卡单家行最低授信额 # 未销户贷记卡已用额度 # 未销户贷记卡近6月平均使用额度 # 未结清贷记卡信息汇总 # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度'] creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0, "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0} # 信 贷 审 批 查 询 记 录 明 细 queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0, "lastTimeLoanApproveMonth": 0} #最近一笔结清贷款的贷款金额  loanAccountInfo = {"lastSettleLoanAmt": 0} loanAccountDfs=[];#横向合并 creditCardAccountDfs=[];#贷记卡账户合并 #============================指标定义区 start============================= #基本信息 basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0]) #概要信息 # briefInfoDf = pd.DataFrame(columns=consts.briefInfoHeader, index=[0]) #信贷交易信息提示 briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo, index=[0]) #被追偿信息汇总及呆账信息汇总 briefInfoDf_recoveryInfo_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo_badDebtsInfoSum, index=[0]) #逾期(透支)信息汇总 briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum, index=[0]) #信贷交易授信及负债信息概要 briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo, index=[0]) #贷款信息 loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader, index=[0]) #贷记卡信息 creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader, index=[0]) #查询记录明细指标 queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader, index=[0]) #============================指标定义区 end============================= # 解析被查询信息指标 def parseQueryInfo(dfObj): df = dfObj["df"]; reportTime = df.loc[0, :][3] reportTime = reportTime.split(":")[1] reportTime = reportTime.replace(".", "-"); # 报告时间 queryInfo["reportTime"] = reportTime row = df.loc[2, :] queryInfo["queryInfoName"] = row[0]; # 被查询者姓名 basicInfoDf.loc[0, '姓名'] = row[0] queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码 basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "") # 婚姻状况 # 学历 # 单位电话 # 住宅电话 # 通讯地址 def parseIdentity(dfObj): df = dfObj["df"]; row1 = df.loc[1, :].dropna().reset_index(drop=True) # identity["marital"] = row1[3] # 婚姻状况 # identity["education"] = row1[4] # 学历 # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址 basicInfoDf.loc[0, '性别'] = row1[0] basicInfoDf.loc[0, '出生年月'] = dfParser.formatDate(row1[1])[0:7] basicInfoDf.loc[0, '国籍'] = row1[6] basicInfoDf.loc[0, '户籍地址'] = row1[9].replace("\n", "") basicInfoDf.loc[0, '婚姻状况'] = row1[2] basicInfoDf.loc[0, '学位'] = row1[4] basicInfoDf.loc[0, '通讯地址'] = row1[8].replace("\n", "") basicInfoDf.loc[0, '就业状况'] = row1[5] # mobileDf = dfObj["mobileDf"]; # basicInfoDf.loc[0, '历史电话号码数'] = mobileDf.index.size # basicInfoDf.loc[0, '近3个月电话号码数'] = getLastMonthMobileCount(mobileDf,3) #最近几个月电话号码数 def getLastMonthMobileCount(df, month): # 当前日期 last1MonthDateStr = time.strftime("%Y-%m-%d"); # 最近一个月 lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D') lastMonthMobileDf = df[df[5] >= str(lastMonthDate)] return lastMonthMobileDf.shape[0]; # 配偶姓名 # 配偶证件号码 # 配偶工作单位 # 配偶联系电话 def parseMate(dfObj): df = dfObj["df"]; if not df.empty: row1 = df.loc[1, :] mate["mateName"] = row1[0] # 配偶姓名 mate["mateCardId"] = row1[2] # 配偶证件号码 mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位 mate["mateContactTel"] = row1[4]; # 配偶联系电话 basicInfoDf.loc[0, '配偶姓名'] = row1[0] basicInfoDf.loc[0, '配偶证件号码'] = row1[2] basicInfoDf.loc[0, '配偶工作单位'] = row1[3].replace("\n", ""); basicInfoDf.loc[0, '配偶联系电话'] = row1[4].replace("\n", ""); #解析居住信息 def parseLiveInfo(dfObj): df = dfObj["df"]; if not df.empty: row1 = df.loc[1, :] basicInfoDf.loc[0, '居住地址'] = row1[1] basicInfoDf.loc[0, '住宅电话'] = row1[2] basicInfoDf.loc[0, '历史居住地个数'] = df.index.size-1; curDate = np.datetime64(time.strftime("%Y-%m-%d")); last3year = str(curDate)[0:4] last3yearDate = str(int(last3year)-3)+str(curDate)[4:10] lastLiveDf = df[df[4]>=last3yearDate]; basicInfoDf.loc[0, '最近3年内居住地个数'] = lastLiveDf.index.size-1; houseIndex = df[df[3]=='自置'].index.size>0 if (houseIndex): houseStr = '是' else: houseStr= '否' basicInfoDf.loc[0, '当前住房状态-是否具有自有住房'] = houseStr; basicInfoDf.loc[0, '居住状况'] = row1[3] basicInfoDf.loc[0, '居住信息更新日期'] = row1[4] # 个人住房贷款笔数 perHouseLoanAccount # 个人商用房(包括商住两用)贷款笔数 perBusHouseLoanAccount # 其他贷款笔数 otherLoanAccount # 贷记卡账户数 creditAccount # 贷款账龄(月数) loanMonthMin # 信用卡账龄(月数)creditCardMonthMin # 日期相减离当前时间月份 # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4 # def difMonth(dateStr): # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) - # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30); # 信贷交易明细汇总 def parseLoanTradeInfo(dfObj): df = dfObj["df"]; # row1 = df.loc[1, :] loanMonthDf = df[1: 4] loanMonthDf = loanMonthDf.reset_index(drop=True) loanTradeInfo["perHouseLoanAccount"] = loanMonthDf.loc[0, :][2] # 第0行第二列 个人住房贷款笔数 briefInfoDf_loanTradeInfo.loc[0, '住房贷款笔数'] = loanMonthDf.loc[0, :][2] loanTradeInfo["perBusHouseLoanAccount"] = loanMonthDf.loc[1, :][2] # 第1行第二列 个人商用房(包括商住两用)贷款笔数 briefInfoDf_loanTradeInfo.loc[0,'个人商用房(包括商住两用)贷款笔数']=loanMonthDf.loc[1, :][2] loanTradeInfo["otherLoanAccount"] = loanMonthDf.loc[2, :][2] # 第2行第二列 其他贷款笔数 briefInfoDf_loanTradeInfo.loc[0, '其他贷款笔数'] = loanMonthDf.loc[2, :][2] loanMonthDf = loanMonthDf[loanMonthDf[3] != '--'] loanMonthMin = loanMonthDf[3].min() # 首笔贷款发放月份最小值 # if loanMonth != "": if loanMonthMin != "" and not math.isnan(float(loanMonthMin)): loanMonthMin = utils.difMonth(loanMonthMin) loanTradeInfo["loanMonthMin"] = loanMonthMin; # 贷款账龄(月数) creditCardDf = df[4: 6]; creditCardDf = creditCardDf.reset_index(drop=True) creditCardMonthDf = creditCardDf[creditCardDf[3] != '--'] creditCardMonthMin = creditCardMonthDf[3].min() # 首笔贷记卡发放月份最小值 creditCardMonthDf = creditCardMonthDf.reset_index(drop=True) if creditCardMonthMin != "" and not math.isnan(float(creditCardMonthMin)): creditCardMonthMin = utils.difMonth(creditCardMonthMin) if str(creditCardMonthMin) != "nan": loanTradeInfo["creditCardMonthMin"] = creditCardMonthMin; # 信用卡账龄(月数) loanTradeInfo["creditAccount"] = creditCardDf.loc[0, :][2] # 第0行第3列 贷记卡账户数 briefInfoDf_loanTradeInfo.loc[0, '贷记卡账户数'] = creditCardDf.loc[0, :][2] loanTradeInfo["creditAccountZ"] = creditCardDf.loc[1, :][2] # 第1行第3列 briefInfoDf_loanTradeInfo.loc[0, '信用卡账户数'] = creditCardDf.loc[1, :][2] if loanTradeInfo["creditAccountZ"] != "--" and loanTradeInfo["creditAccount"] != "--": loanTradeInfo["creditAccount"] = int(loanTradeInfo["creditAccount"]) + int(loanTradeInfo["creditAccountZ"]) # 解析呆账信息 def parseBadDebtsInfoSumDf(dfObj): df = dfObj["df"]; if not df.empty: row1 = df.loc[1, :] overdueBrief["badDebtsInfoSumAccount"] = row1[0]; # 呆账信息汇总笔数 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账业务账户数'] = row1[0]; overdueBrief["badDebtsInfoSumAmt"] = row1[1]; # 呆账信息汇总余额 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账信息余额'] = row1[1]; else: overdueBrief["badDebtsInfoSumAccount"] = ""; # 呆账信息汇总笔数 overdueBrief["badDebtsInfoSumAmt"] = ""; # 呆账信息汇总余额 # 解析被追偿信息 def parseRecoveryInfo(dfObj): df = dfObj["df"]; if not df.empty: row1 = df.loc[1, :] row2 = df.loc[2, :] row3 = df.loc[3, :] overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户数'] = row1[1]; overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户余额'] = replaceAmt(row1[2]); overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户数'] = row2[1]; overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额 briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户余额'] = replaceAmt(row2[2]); briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总数'] = row3[1]; briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总额'] = replaceAmt(row3[2]); else: overdueBrief["disposalInfoSumAccount"] = ""; # 资产处置信息汇总笔数 overdueBrief["disposalInfoSumAmt"] = ""; # 资产处置信息汇总余额 overdueBrief["advanceInfoSumAccount"] = ""; # 垫款业务笔数 overdueBrief["advanceInfoSumAmt"] = ""; # 垫款业务余额 # 贷款逾期账户数 # 贷款逾期月份数 # 贷款单月最高逾期总额 # 贷款最长逾期月数 def parseOverdueInfo(dfObj): df = dfObj["df"]; if not df.empty: row2= df.loc[2, :] row3 = df.loc[3, :] row4 = df.loc[4, :] row5 = df.loc[5, :] row6 = df.loc[6, :] briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数'] = row2[1]; briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数月数'] = row2[2]; briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户单月最高逾期/透支总额'] = row2[3]; briefInfoDf_overdueInfoSum.loc[0, '循环额度下分账户数'] = row3[1]; briefInfoDf_overdueInfoSum.loc[0, '循环贷帐户数'] = row4[1]; briefInfoDf_overdueInfoSum.loc[0, '贷记卡账户数'] = row5[1]; briefInfoDf_overdueInfoSum.loc[0, '准贷记卡账户数'] = row6[1]; # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数 # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%') # 2.贷款额度>100元 # 3.贷款状态不为“结清”" # 非循环贷账户信息汇总 def doFilterCalc(dfx): dfx = dfx.replace('--', 0) return dfx; # 科学计数法转换 def replaceAmt(dfx): return dfx.str.replace(',', '') # 非循环贷账户信息汇总 如有循环贷款和额度循环的需要进行汇总 TODO def parseLoanAccountInfoSum(dfObj): df = dfObj["df"]; if not df.empty: loanAccountInfoSumDf = df[2:3]; loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0 loanAccountInfoSum["mgrOrgCount"] = np.sum(loanAccountInfoSumDf[0].astype('int')) loanAccountInfoSum["account"] = np.sum(loanAccountInfoSumDf[1].astype('int')) loanAccountInfoSum["creditTotalAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int')) loanAccountInfoSum["balance"] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int')) loanAccountInfoSum["last6AvgPayAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款法人机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款笔数'] = np.sum(loanAccountInfoSumDf[1].astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同总额'] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同余额'] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款近6月平均应还款'] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int')) # {"awardOrgCount":0,"account":0,"creditTotalAmt":0,"perMaxCreditTotalAmt":0,"perMinCreditTotalAmt":0,"useAmt":0,"last6AvgUseAmt":0} # 解析贷记卡信息汇总,包含准贷记卡 def parseCreditCardInfoSum(dfObj): df = dfObj["df"]; if not df.empty: creditCardInfoSumDf = df[2:3]; creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0 creditCardInfoSum["awardOrgCount"] = np.sum(creditCardInfoSumDf[0].astype('int')) creditCardInfoSum["account"] = np.sum(creditCardInfoSumDf[1].astype('int')) creditCardInfoSum["creditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int')) creditCardInfoSum["perMaxCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int')) creditCardInfoSum["perMinCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int')) creditCardInfoSum["useAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int')) creditCardInfoSum["last6AvgUseAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡发卡机构数'] = np.sum(creditCardInfoSumDf[0].astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡账户数'] = np.sum(creditCardInfoSumDf[1].astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡授信总金额'] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最高金额'] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最低金额'] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡已用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int')) briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡最近6个月平均使用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int')) # 解析查询记录明细 def parseQueryInfoDetail(dfObj): df = dfObj["df"]; reportTime = queryInfo["reportTime"]; if not df.empty: df = utils.replaceDateCol(df) df = df[1:df.index.size] # 去掉表头 queryRecordDetail["last1MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 1, "",reportTime) # 去掉表头 queryRecordDetail["last3MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 3, "",reportTime) queryRecordDetail["last3MothLoanApproveTimes"] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove,reportTime) queryRecordDetailDf.loc[0,'近1月查询次数'] = qip.getLastMonthQueryTimes(df, 1, "",reportTime) queryRecordDetailDf.loc[0, '近3月查询次数'] = qip.getLastMonthQueryTimes(df, 3, "",reportTime) queryRecordDetailDf.loc[0, '近6月查询次数'] = qip.getLastMonthQueryTimes(df, 6, "", reportTime) queryRecordDetailDf.loc[0, '近12月查询次数'] = qip.getLastMonthQueryTimes(df, 12, "", reportTime) queryRecordDetailDf.loc[0, '近3月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近3月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime) queryRecordDetailDf.loc[0, '近6月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime) queryRecordDetailDf.loc[0, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近12月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime) queryRecordDetailDf.loc[0, '近3月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近3月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime) queryRecordDetailDf.loc[0, '近6月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime) queryRecordDetailDf.loc[0, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime) queryRecordDetailDf.loc[0, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime) queryRecordDetailDf.loc[0, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime) queryRecordDetailDf.loc[0, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime) queryRecordDetailDf.loc[0, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime) queryRecordDetailDf.loc[0, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime) queryRecordDetailDf.loc[0, '最近12个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime) queryRecordDetailDf.loc[0, '最近12个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime) #解析贷款还款记录指标 def parseLoanMergeAndPayRecordDf(df,payRcdDf): if not df.empty and not payRcdDf.empty: #正常 normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')] overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)] overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf) #临时保存,不用过滤还款状态为0的 payRcdMaxOverdueDf = overduePayRcdDf; overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0] loanAccountInfoDf.loc[0, '当前贷款逾期账户数'] = overduePayRcdDf['账户编号'].unique().size loanAccountInfoDf.loc[0, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期账户数']/df.index.size,2) #存在逾期的贷款账户 非结清的过滤出逾期的账户号 overdueLoanDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)] loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size loanAccountInfoDf.loc[0, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] / df['管理机构'].unique().size,2) #还款记录按日期排序最近3笔的最大逾期期数 loanAccountInfoDf.loc[0, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1); loanAccountInfoDf.loc[0, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3); loanAccountInfoDf.loc[0, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6); loanAccountInfoDf.loc[0, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9); loanAccountInfoDf.loc[0, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24); loanAccountInfoDf.loc[0, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24); payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"] # 贷款24期还款记录次数 剔除结清 转出 呆账 payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)] #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数) payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)] payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count() #payRcdDf[(payRcdDf['还款状态']!='') & (payRcdDf['账户编号']==1)].index.size loanAccountInfoDf.loc[0, '贷款24期还款记录次数'] = np.max(payRcdTimes) #解析贷款账户信息指标 def parseLoanMergeDf(df): if not df.empty: sortDf = df.sort_values(by=["开立日期","借款金额(本金)"] , ascending=(False,False)) sortDf = sortDf[sortDf['账户状态'] == '结清']; sortDf = sortDf.reset_index(drop=True) if not sortDf.empty: row0 = sortDf.loc[0, :] loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)'] loanAccountInfoDf.loc[0, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)'] openDate = dfParser.formatDate(row0['开立日期']) loanAccountInfoDf.loc[0, '最近一笔结清贷款的发放距今月数'] = utils.difMonth(openDate) settleDate = dfParser.formatDate(row0['账户关闭日期']) loanAccountInfoDf.loc[0, '最近一笔结清贷款的结清距今月数'] = utils.difMonth(settleDate) loanAccountInfoDf.loc[0, '历史贷款总法人机构数'] = df['管理机构'].unique().size loanAccountInfoDf.loc[0, '当前同时在用的贷款机构数'] = df[df['借款金额(本金)']>0]['管理机构'].unique().size statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')] bankDf = statusDf[statusDf['管理机构'].str.contains('银行')] #没有记录 if statusDf.index.size==0: isNotBankCust = -1 else: if bankDf.index.size >0:#有一条以上不为结清,请包含银行 isNotBankCust = 1; else: isNotBankCust = 0; loanAccountInfoDf.loc[0, '是否有非银贷款客户'] = isNotBankCust #最严重的五级分类 fiveType = "" for fiveTypeTmp in consts.fiveType: fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp]; if not fiveTypeDf.empty: fiveType = fiveTypeTmp; break; loanAccountInfoDf.loc[0, '贷款五级分类'] = fiveType #当前贷款LTV # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0 # 的记录后,SUM(本金余额) / SUM(贷款本金) # 如本金余额为空和贷款本金为0或为空,则当条记录不计算 loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')] badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))] balanceSum = np.sum(badSetDf['余额(本金)'].astype('int')) loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int')) if(loanAmtSum !=0): loanAccountInfoDf.loc[0, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2) loanAccountInfoDf.loc[0, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2) loanAccountInfoDf.loc[0, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2) loanAccountInfoDf.loc[0, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2) houseLtvList = consts.houseLtvList; # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)] # if not houseLtvDf.empty: # loanAccountInfoDf.loc[0, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2) #['个人住房贷款','个人商用房(包括商住两用)贷款'] loanAccountInfoDf.loc[0, '当前房贷LTV'] = lip.getCurLtv(badSetDf,houseLtvList) cardLtvList = ['个人汽车消费贷款'] loanAccountInfoDf.loc[0, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList) operateLtvList = ['个人经营性贷款'] loanAccountInfoDf.loc[0, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList) consumeLtvList = ['其他个人消费贷款'] loanAccountInfoDf.loc[0, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList) bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行'] loanAccountInfoDf.loc[0, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList) bankLtvList = ['消费金融有限公司','汽车金融公司','信托投资'] loanAccountInfoDf.loc[0, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList) smallLoanLtvList = ['机构','小额信贷公司','财务公司'] loanAccountInfoDf.loc[0, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList) #当前贷款最大逾期期数 # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数) loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')] if not loanOverdueLtvDf.empty: loanAccountInfoDf.loc[0, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数']) loanAccountInfoDf.loc[0, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额']) loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True) maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数']) loanAccountInfoDf.loc[0, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额'] loanAccountInfoDf.loc[0, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里 loanAccountInfoDf.loc[0, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3) loanAccountInfoDf.loc[0, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3) loanAccountInfoDf.loc[0, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6) loanAccountInfoDf.loc[0, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6) loanAccountInfoDf.loc[0, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6) loanAccountInfoDf.loc[0, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12) loanAccountInfoDf.loc[0, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12) loanAccountInfoDf.loc[0, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12) lastLoanDf = loanOverdueLtvDf; if not lastLoanDf.empty: loanAccountInfoDf.loc[0, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"]) normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)] #未结清贷款总账户数:账户状态不等于结清和转出的记录数 notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')] if not notSettleDf.empty: loanAccountInfoDf.loc[0, '当前正常贷款账户数'] = normalDf.index.size loanAccountInfoDf.loc[0, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2) loanAccountInfoDf.loc[0, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)']) # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总) loanAccountInfoDf.loc[0, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']),2) settleDf = df[(df['账户状态'] == '结清')] loanAccountInfoDf.loc[0, '当前正常结清贷款账户数'] = settleDf.index.size loanAccountInfoDf.loc[0, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2) #贷款24期还款记录次数 TODO # 最近3个月个人消费贷款发放额度 loanAccountInfoDf.loc[0, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款']) loanAccountInfoDf.loc[0, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3) loanAccountInfoDf.loc[0, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6) loanAccountInfoDf.loc[0, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12) #解析贷记卡账户信息指标 def parseCreditCardMergeDf(df): if not df.empty: # 历史信用卡总法人机构数 creditCardAccountInfoDf.loc[0,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size creditCardUseDf = df[df['已用额度']>0]; creditCardAccountInfoDf.loc[0,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size #统一排除 creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')] creditCardAccountInfoDf.loc[0,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf) creditCardAccountInfoDf.loc[0, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf) creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9) creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, df, 1) # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度) useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')] totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')] creditCardAccountInfoDf.loc[0, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2) creditCardAccountInfoDf.loc[0, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2) creditCardAccountInfoDf.loc[0, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2) creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数'] = np.max(creditDf['当前逾期期数']) creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期金额'] = np.max(creditDf['当前逾期总额']) if not creditDf.empty: creditDf = creditDf.reset_index(drop=True) maxOverdueIndex = np.argmax(creditDf['当前逾期期数']) creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额'] creditCardAccountInfoDf.loc[0, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3) creditCardAccountInfoDf.loc[0, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3) creditCardAccountInfoDf.loc[0, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3) creditCardAccountInfoDf.loc[0, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6) creditCardAccountInfoDf.loc[0, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6) creditCardAccountInfoDf.loc[0, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6) creditCardAccountInfoDf.loc[0, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12) creditCardAccountInfoDf.loc[0, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12) creditCardAccountInfoDf.loc[0, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12) if not creditDf.empty: creditCardAccountInfoDf.loc[0, '信用卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"]) creditCardAccountInfoDf.loc[0, '贷记卡还款比例'] = round(np.sum(creditDf['本月应还款'])/np.sum(creditDf['本月实还款']),2) creditCardAccountInfoDf.loc[0, '贷记卡最高还款比例'] = round(np.max(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2) creditCardAccountInfoDf.loc[0, '贷记卡最低还款比例'] = round(np.min(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2) normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)]; notCloseDf = df[(df['账户状态'] != '销户')] if not notCloseDf.empty and not normalDf.empty: creditCardAccountInfoDf.loc[0, '当前正常信用卡账户数'] = round(normalDf.index.size/notCloseDf.index.size,2) creditCardAccountInfoDf.loc[0, '当前正常信用卡已用额度'] = np.sum(normalDf['已用额度']) creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2) creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数'] = normalDf[normalDf['已用额度']>0].index.size if notCloseDf.empty: creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数占比'] = -99 else: creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数']/notCloseDf.index.size,2) creditCardAccountInfoDf.loc[0, '贷记卡本月实还款金额'] = np.sum(creditDf['本月实还款']) maxAmtDf = df[(df['币种'] == '人民币元')] if not maxAmtDf.empty: maxAmtDf = maxAmtDf.reset_index(drop=True) maxAmtIndex = np.argmax(maxAmtDf['账户授信额度']) maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期']; creditCardAccountInfoDf.loc[0, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"])+1; #解析贷款还款记录指标 def parseCreditCardMergeAndPayRecordDf(df,payRcdDf): if not df.empty and not payRcdDf.empty: # 正常 normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')] if not normalDf.empty: overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)] overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf) # 临时保存,不用过滤还款状态为0的 payRcdMaxOverdueDf = overduePayRcdDf; overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0] creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] = overduePayRcdDf['账户编号'].unique().size #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数) creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] / normalDf.index.size, 2) #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数 overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)] creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2) creditCardAccountInfoDf.loc[0, '近3月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3); creditCardAccountInfoDf.loc[0, '近6月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6); creditCardAccountInfoDf.loc[0, '近9月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9); creditCardAccountInfoDf.loc[0, '近12月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12); creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24); creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24); creditCardAccountInfoDf.loc[0, '最近3个月信用卡最大连续逾期月份数'] = 0; payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)] payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count() creditCardAccountInfoDf.loc[0, '贷记卡24期还款记录次数'] = np.max(payRcdTimes) def main(pdf_path): # 解析pdf开始 with pdfplumber.open(pdf_path) as pdf: for p in range(0, len(pdf.pages)): page = pdf.pages[p] # first_page = pdf.pages[1] # if p == 3: # print(3) for i in range(0, len(page.extract_tables())): table = page.extract_tables()[i] df = pd.DataFrame(table); if len(keyList) > 1 and i == 0: # 判断是否被分页了 if not utils.checkHeader(df, allHeaders): key = keyList[-1]; dfObj = dfMap[key] # dfObj["nextDf"]=df; # 贷款信息 贷记卡信息 强制执行记录 if key == "loanDfs" or key == "creditCardDfs" or key == "forceExecRcdDfs": # 属于列表 lastDfObj = dfObj["dfs"][-1]; lastDfObj["isByPage"] = str(p + 1); if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同 lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True); # 去最后一个进行合并 # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页") else: # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页") lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True); else: # 查询记录明细 为单个列表 dfObj["isByPage"] = str(p + 1); if len(dfObj["df"].columns) == len(df.columns): # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页") dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True) else: # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页") dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True) # dfObj["nextDf"] = df; # 如果列数相等合并df continue; headerList0 = df.loc[0, :].tolist() # 第0行为表头 headerList1 = [] if df.index.size>1: headerList1 = df.loc[1, :].tolist() # 第1行为表头 if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据 queryInfoDf = df; dfKey = "queryInfoDf" dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == identity_header: # 身份信息 identityDf = df[:2] # 截取前2行 addressDf = df.iloc[2:4, [0, 5]] # 截取3到4行的第一和6 addressDf = addressDf.reset_index(drop=True) mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5) identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并 dfKey = "identityDf" dfMap[dfKey]["df"] = identityDf; keyList.append(dfKey); # 组装电话号码df dfMap[dfKey]["mobileDf "] = mobileDf elif headerList0 == mateDf_header: # 配偶信息 mateDf = df; dfKey = "mateDf" dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == liveInfoDf_header: # 居住信息 mateDf = df; dfKey = "liveInfoDf" dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页 occupationInfoDf = df; dfKey = "occupationInfoDf" dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据 queryInfoBriefDf = df; dfKey = "queryInfoBriefDf" dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == loanTradeInfo_header: # 信贷交易信息 loanTradeInfoDf = df; dfKey = "loanTradeInfoDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == recoveryInfoSumDf_header: # 被追偿信息汇总 recoveryInfoSumDf = df; dfKey = "recoveryInfoSumDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == badDebtsInfoSumDf_header: # 呆账信息 badDebtsInfoSumDf = df; dfKey = "badDebtsInfoSumDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总 overdueInfoSumDf = df; dfKey = "overdueInfoSumDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总 loanAccountInfoSumDf = df; dfKey = "loanAccountInfoSumDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总 creditCardInfoSumDf = df; dfKey = "creditCardInfoSumDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据 creditCardInfoSumDfZ = df; dfKey = "creditCardInfoSumDfZ"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif list(filter(None, headerList0)) == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户 dfKey = "loanDfs"; dfMap[dfKey]["dfs"].append({"df": df}); keyList.append(dfKey); elif list(filter(None, headerList0)) == creditCard_header: # 贷记卡账户 dfKey = "creditCardDfs"; dfMap[dfKey]["dfs"].append({"df": df}); keyList.append(dfKey); elif list(filter(None, headerList0)) == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并 dfKey = "creditCardDfsZ"; dfMap[dfKey]["dfs"].append({"df": df}); keyList.append(dfKey); elif list(filter(None, headerList0)) == queryRecordDetailDf_header: # 查询记录明细 dfKey = "queryRecordDetailDf"; dfMap[dfKey]["df"] = df; keyList.append(dfKey); elif list(filter(None, headerList0)) == forceExecRcdDfs_header: # 强制执行记录 dfKey = "forceExecRcdDfs"; dfMap[dfKey]["dfs"].append({"df": df}); keyList.append(dfKey); # 设置分页 dfMap[dfKey]["page"] = p + 1; # 打印结果解析并构建指标 for key in dfMap: if dfMap[key].__contains__("page"): logger.info(key + "-page-" + str(dfMap[key]["page"])) if dfMap[key].__contains__("dfs"): if key == "loanDfs": # 贷款账户 for idx in range(0, len(dfMap[key]["dfs"])): tempDfObj = dfMap[key]["dfs"][idx]; if tempDfObj.__contains__("isByPage"): # print("贷款账户被分页#################") # print(key + "============被分页页数============" + str(tempDfObj["isByPage"])) loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime'])) logger.info(tempDfObj["df"].values) else: # 未被分页 logger.info(tempDfObj["df"].values) loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime'])) elif key == "creditCardDfs": # 贷记卡账户合并 for idx in range(0, len(dfMap[key]["dfs"])): tempDfObj = dfMap[key]["dfs"][idx]; creditCardAccountDfs.append(dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime'])) else: # 其他 for tempDfObj in (dfMap[key]["dfs"]): if tempDfObj.__contains__("isByPage"): logger.info(key + "============其他被分页页数============" + str(tempDfObj["isByPage"])) logger.info(tempDfObj["df"].values) else: # 单笔 tempDfObj = dfMap[key]; if tempDfObj.__contains__("isByPage"): logger.info(key + "============被分页页数================" + str(tempDfObj["isByPage"])) logger.info(tempDfObj["df"].values) if key == "queryInfoDf": # 解析被查询信息 parseQueryInfo(tempDfObj); # print("\033[1;31m +查询信息+ \033[0m") # print(queryInfo) elif key == "identityDf": # 身份信息 parseIdentity(tempDfObj) # print("\033[1;31m +身份信息+ \033[0m") # print(identity) elif key == "mateDf": # 配偶信息 parseMate(tempDfObj) # print("\033[1;31m +配偶信息+ \033[0m") # print(mate) elif key == "liveInfoDf": # 居住信息 parseLiveInfo(tempDfObj) # print("\033[1;31m +居住信息+ \033[0m") elif key == "loanTradeInfoDf": # 信贷交易信息提示 parseLoanTradeInfo(tempDfObj); # print("\033[1;31m +信贷交易信息提示+ \033[0m") # print(loanTradeInfo) elif key == "badDebtsInfoSumDf": # 呆账信息汇总 parseBadDebtsInfoSumDf(tempDfObj) # print("\033[1;31m +呆账信息汇总+ \033[0m") # print(overdueBrief) elif key == "recoveryInfoDf": # 被追偿信息汇总-资产处置和垫款 parseRecoveryInfo(tempDfObj) # print("\033[1;31m +资产处置和垫款+ \033[0m") # print(overdueBrief) elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总 parseOverdueInfo(tempDfObj) # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m") # print(overdueInfo) elif key == "loanAccountInfoSumDf": # 需要纳入循环贷及额度下循环分账户 TODO tempDfObj_cycleLoanAccount = dfMap["cycleLoanAccountInfoSumDf"]; tempDfObj_cycleCredit = dfMap["cycleCreditAccountInfoSumDf"]; if not tempDfObj_cycleLoanAccount["df"].empty: # 循环贷 tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleLoanAccount["df"][2:3]], axis=0, ignore_index=True) if not tempDfObj_cycleCredit["df"].empty: # 额度下循环分账户 tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleCredit["df"][2:3]], axis=0, ignore_index=True) parseLoanAccountInfoSum(tempDfObj) # print("\033[1;31m +贷款信息汇总+ \033[0m") # print(loanAccountInfoSum) elif key == "creditCardInfoSumDf": tempDfObjZ = dfMap["creditCardInfoDfZ"]; # 准贷记卡纳入计算 2:3为准贷记卡数据 if not tempDfObjZ["df"].empty: tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObjZ["df"][2:3]], axis=0, ignore_index=True) parseCreditCardInfoSum(tempDfObj) # print("\033[1;31m +贷记卡信息汇总+ \033[0m") # print(creditCardInfoSum) elif key == "queryRecordDetailDf": # 查询记录明细 parseQueryInfoDetail(tempDfObj) # print("\033[1;31m +查询记录明细+ \033[0m") # print(queryInfoDetail) result = "" # 基本信息 result+=("\033[1;34m +基本信息+ \033[0m")+"\n" result+=utils.toJson(basicInfoDf)+"\n" result+=("\033[1;34m +概要信息+ \033[0m")+"\n" result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+"\n" result+=utils.toJson(briefInfoDf_loanTradeInfo)+"\n" result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+"\n" result+=utils.toJson(briefInfoDf_recoveryInfo_badDebtsInfoSum)+"\n" result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+"\n" result+=utils.toJson(briefInfoDf_overdueInfoSum)+"\n" result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+"\n" result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+"\n" # 单独输出贷款df logger.info("\033[1;34m +贷款信息Dataframe+ \033[0m") logger.info(dfParser.dfHeaderLoan) loanMergeDf = pd.DataFrame() loanPayRecordMergeDf = pd.DataFrame() # 输出数据 for loanDfObj in loanAccountDfs: loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True); loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0, ignore_index=True); logger.info(loanMergeDf.values) logger.info("\033[1;34m +贷款信息还款记录Dataframe+ \033[0m") logger.info(dfParser.dfHeaderLoanPayRecord) logger.info(loanPayRecordMergeDf.values) # 解析贷款账户指标 parseLoanMergeDf(loanMergeDf); # 解析还款记录相关指标 parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf); # logger.info(loanAccountInfo) logger.info(consts.loanAccountInfoHeader) logger.info(loanAccountInfoDf.values) result+=("\033[1;34m +贷款账户信息+ \033[0m")+"\n" result+=utils.toJson(loanAccountInfoDf)+"\n" #贷记卡合并df creditCardMergeDf = pd.DataFrame() creditCardPayRecordMergeDf = pd.DataFrame() logger.info("\033[1;34m +贷记卡信息Dataframe+ \033[0m") logger.info(dfParser.dfHeaderCreditCard) # 输出数据 for creditCardDfObj in creditCardAccountDfs: creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True); creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True); logger.info(creditCardMergeDf.values) # 解析贷记卡账户指标 parseCreditCardMergeDf(creditCardMergeDf); parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf) result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+"\n" result+=utils.toJson(creditCardAccountInfoDf)+"\n" result+=("\033[1;34m +查询记录明细+ \033[0m")+"\n" result+=utils.toJson(queryRecordDetailDf)+"\n" return result; # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig') if __name__ == '__main__': start = timeit.default_timer(); basePath = "D:/mydocument/myproject/git/busscredit/Crerdai/"; pdf_path = basePath + "闻海雁532329198801060347.pdf" pdf_path = basePath+"雷雨晴130630199006130027.pdf" pdf_path=basePath+"杨安140402197102111236.pdf" pdf_path=basePath+"刘盼兰130133198912261210.pdf" pdf_path=basePath+"马维强130521198604045272.pdf" pdf_path = basePath + "郑晨晨130681199008205811.pdf" # pdf_path=basePath+"人行征信模拟数据报告.pdf" basePath = "D:/mydocument/myproject/git/busscredit/20200414_report/"; pdf_path = basePath + "艾思语51112319960218732X.pdf" isBat = False; if isBat: for file in os.listdir(basePath): if file.endswith("pdf"): pdf_path = basePath+file; outPath = pdf_path.replace("pdf",'txt') if os.path.exists(outPath): continue; logger.info(file + "解析开始...") result = main(pdf_path) # print(result) #输出到文件 sys.stdout = open(outPath, mode='w', encoding='utf-8') print(result.replace("\033[1;34m","").replace("\033[0m","")) logger.info(file+"解析完成") else: outPath = pdf_path.replace("pdf", 'txt') result = main(pdf_path) sys.stdout = open(outPath, mode='w', encoding='utf-8') print(result.replace("\033[1;34m", "").replace("\033[0m", "")) s = timeit.default_timer() - start; logger.info(str(s) + " 秒")