parseCreditPdfV1.py 69 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101
  1. import pdfplumber
  2. import pandas as pd
  3. import numpy as np;
  4. import sys
  5. import os
  6. #指标相关
  7. import loanIndexParser as lip;
  8. import payRcdIndexParser as prp;
  9. import creditCardIndexParser as cip
  10. import queryInfoIndexParser as qip
  11. import utils;
  12. import time;
  13. import consts;
  14. import math
  15. import dfParser;
  16. pd.set_option('mode.chained_assignment', None)
  17. import log
  18. logger = log.logger
  19. # 查询信息
  20. dfMap = {};
  21. allHeaders = [] # 所有表头
  22. queryInfoDf = pd.DataFrame();
  23. queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"];
  24. dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None};
  25. allHeaders.append(queryInfoDf_header);
  26. # 身份信息
  27. identityDf = pd.DataFrame();
  28. identity_header = ['性别', None, '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱']
  29. addressDf = pd.DataFrame(); # 通讯地址
  30. dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None};
  31. allHeaders.append(identity_header);
  32. # 配偶信息
  33. mateDf = pd.DataFrame();
  34. mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话']
  35. dfMap["mateDf"] = {"df": mateDf, "nextDf": None};
  36. allHeaders.append(mateDf_header);
  37. # 居住信息====暂时该信息没有用到先不解析
  38. liveInfoDf = pd.DataFrame();
  39. liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期']
  40. dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None};
  41. allHeaders.append(liveInfoDf_header);
  42. # 职业信息
  43. occupationInfoDf = pd.DataFrame();
  44. occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话']
  45. occupationInfoDf1 = pd.DataFrame();
  46. # occupationInfo_header1 = ['编号', '职业', '行业', None, None, '职务', '职称', '进入本单位年份', None, '信息更新日期']
  47. dfMap["occupationInfoDf"] = ({"df": occupationInfoDf, "nextDf": None});
  48. # allHeaders.append(occupationInfo_header1);
  49. allHeaders.append(occupationInfo_header);
  50. # 上次查询记录
  51. preQueryRcd_header0 = ['上一次查询记录']
  52. allHeaders.append(preQueryRcd_header0);
  53. # 查询记录概要
  54. queryInfoBriefDf = pd.DataFrame();
  55. queryInfoBrief_header0 = ['最近1个月内的查询机构数', None, '最近1个月内的查询次数', None, None, '最近2年内的查询次数', None, None]
  56. queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查']
  57. dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None});
  58. allHeaders.append(queryInfoBrief_header0);
  59. allHeaders.append(queryInfoBrief_header1);
  60. # 信贷交易信息提示
  61. loanTradeInfoDf = pd.DataFrame();
  62. loanTradeInfo_header = ['业务类型', None, '账户数', '首笔业务发放月份'];
  63. dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None});
  64. allHeaders.append(loanTradeInfo_header)
  65. # 信贷交易违约信息概要
  66. # 被追偿信息汇总 资产处置和垫款业务
  67. recoveryInfoSumDf = pd.DataFrame();
  68. recoveryInfoSumDf_header = ['业务类型', '账户数', '余额'];
  69. dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None});
  70. allHeaders.append(recoveryInfoSumDf_header)
  71. # 呆账信息汇总
  72. badDebtsInfoSumDf = pd.DataFrame();
  73. badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总
  74. dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None});
  75. allHeaders.append(badDebtsInfoSumDf_header)
  76. # 逾期透资信息汇总
  77. overdueInfoSumDf = pd.DataFrame();
  78. overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数']
  79. dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None});
  80. allHeaders.append(overdueInfoSumDf_header)
  81. # 非循环贷账户信息汇总
  82. loanAccountInfoSumDf = pd.DataFrame();
  83. loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总', None, None, None, None]
  84. loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  85. dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None});
  86. allHeaders.append(loanAccountInfoSumDf_header0)
  87. allHeaders.append(loanAccountInfoSumDf_header1)
  88. # 循环额度下分账户信息汇总
  89. cycleCreditAccountInfoSumDf = pd.DataFrame();
  90. cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总', None, None, None, None]
  91. cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'],
  92. dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None});
  93. allHeaders.append(cycleCreditAccountInfoSumDf_header0)
  94. allHeaders.append(cycleCreditAccountInfoSumDf_header1)
  95. # 循环贷账户信息汇总
  96. cycleLoanAccountInfoSumDf = pd.DataFrame();
  97. cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总', None, None, None, None]
  98. cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  99. dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None});
  100. allHeaders.append(cycleLoanAccountInfoSumDf_header0)
  101. allHeaders.append(cycleLoanAccountInfoSumDf_header1)
  102. # 贷记卡账户信息汇总
  103. creditCardInfoSumDf = pd.DataFrame();
  104. creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总', None, None, None, None, None, None]
  105. creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  106. dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None});
  107. allHeaders.append(creditCardInfoSumDf_header0)
  108. allHeaders.append(creditCardInfoSumDf_header1)
  109. # 准贷记卡账户信息汇总
  110. creditCardInfoSumDfZ = pd.DataFrame();
  111. creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总', None, None, None, None]
  112. creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  113. dfMap["creditCardInfoDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None});
  114. allHeaders.append(creditCardInfoSumDfZ_header0)
  115. allHeaders.append(creditCardInfoSumDfZ_header1)
  116. # 非循环贷账户,循环额度下分账户
  117. # 循环贷账户
  118. loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种']
  119. loanDfs = [];
  120. dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []});
  121. allHeaders.append(loan_header)
  122. # 贷记卡账户
  123. creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式']
  124. creditCardDfs = [];
  125. dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []});
  126. allHeaders.append(creditCard_header)
  127. # 准备贷记卡账户
  128. creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式']
  129. creditCardDfsZ = [];
  130. dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []});
  131. allHeaders.append(creditCardZ_header)
  132. #
  133. # 相关还款责任信息汇总 未使用到
  134. # 信贷交易信息明细
  135. # 被追偿信息 未使用到
  136. # 公共信息明细
  137. # 强制执行记录
  138. forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式']
  139. forceExecRcdDfs = [];
  140. dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []});
  141. allHeaders.append(forceExecRcdDfs_header)
  142. # 查询记录
  143. queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因']
  144. dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []});
  145. allHeaders.append(queryRecordDetailDf_header)
  146. # 处理分页思路
  147. # df估计得放到对象里面,然后存储下一个df,一个对象里包含key
  148. # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df
  149. # 针对可合并的列的场景
  150. # =======
  151. keyList = [] # 存储所有的df的key列表
  152. # pd.Series()
  153. # 检查数据是否带表头
  154. # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下
  155. import timeit
  156. # 定义指标部分======================start
  157. reportTime = ""; # 报告时间
  158. # 被查询者姓名
  159. queryInfoName = "";
  160. queryInfoCardId = "" # 被查询者证件号码
  161. # 定义指标部分======================end
  162. # 被查询信息-基础信息
  163. # 报告时间
  164. # 被查询者姓名
  165. # 被查询者证件号码
  166. # 基础信息
  167. queryInfo = {"reportTime":""}
  168. # 身份信息
  169. identity = {}
  170. # 配偶信息
  171. mate = {}
  172. # 信贷交易信息提示-信用提示
  173. loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0,
  174. 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0}
  175. # 逾期及违约信息概要
  176. overdueBrief = {}
  177. # 逾期及透资信息汇总
  178. # 贷款逾期账户数 loanOverdueAccount
  179. # 贷款逾期月份数 loanOverdueMonth
  180. # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal
  181. # 贷款最长逾期月数 loanMaxOverdueMonth
  182. overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "",
  183. "loanMaxOverdueMonth": "",
  184. "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "",
  185. "creditCardMaxOverdueMonth": ""}
  186. # 未结清贷款信息汇总
  187. # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  188. loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0}
  189. # 未销户贷记卡发卡法人机构数
  190. # 未销户贷记卡发卡机构数
  191. # 未销户贷记卡账户数
  192. # 未销户贷记卡授信总额
  193. # 未销户贷记卡单家行最高授信额
  194. # 未销户贷记卡单家行最低授信额
  195. # 未销户贷记卡已用额度
  196. # 未销户贷记卡近6月平均使用额度
  197. # 未结清贷记卡信息汇总
  198. # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  199. creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0,
  200. "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0}
  201. # 信 贷 审 批 查 询 记 录 明 细
  202. queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0,
  203. "lastTimeLoanApproveMonth": 0}
  204. #最近一笔结清贷款的贷款金额 
  205. loanAccountInfo = {"lastSettleLoanAmt": 0}
  206. loanAccountDfs=[];#横向合并
  207. creditCardAccountDfs=[];#贷记卡账户合并
  208. #============================指标定义区 start=============================
  209. #基本信息
  210. basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0])
  211. #概要信息
  212. # briefInfoDf = pd.DataFrame(columns=consts.briefInfoHeader, index=[0])
  213. #信贷交易信息提示
  214. briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo, index=[0])
  215. #被追偿信息汇总及呆账信息汇总
  216. briefInfoDf_recoveryInfo_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo_badDebtsInfoSum, index=[0])
  217. #逾期(透支)信息汇总
  218. briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum, index=[0])
  219. #信贷交易授信及负债信息概要
  220. briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo, index=[0])
  221. #贷款信息
  222. loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader, index=[0])
  223. #贷记卡信息
  224. creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader, index=[0])
  225. #查询记录明细指标
  226. queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader, index=[0])
  227. #============================指标定义区 end=============================
  228. # 解析被查询信息指标
  229. def parseQueryInfo(dfObj):
  230. df = dfObj["df"];
  231. reportTime = df.loc[0, :][3]
  232. reportTime = reportTime.split(":")[1]
  233. reportTime = reportTime.replace(".", "-"); # 报告时间
  234. queryInfo["reportTime"] = reportTime
  235. row = df.loc[2, :]
  236. queryInfo["queryInfoName"] = row[0]; # 被查询者姓名
  237. basicInfoDf.loc[0, '姓名'] = row[0]
  238. queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码
  239. basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "")
  240. # 婚姻状况
  241. # 学历
  242. # 单位电话
  243. # 住宅电话
  244. # 通讯地址
  245. def parseIdentity(dfObj):
  246. df = dfObj["df"];
  247. row1 = df.loc[1, :].dropna().reset_index(drop=True)
  248. # identity["marital"] = row1[3] # 婚姻状况
  249. # identity["education"] = row1[4] # 学历
  250. # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址
  251. basicInfoDf.loc[0, '性别'] = row1[0]
  252. basicInfoDf.loc[0, '出生年月'] = dfParser.formatDate(row1[1])[0:7]
  253. basicInfoDf.loc[0, '国籍'] = row1[6]
  254. basicInfoDf.loc[0, '户籍地址'] = row1[9].replace("\n", "")
  255. basicInfoDf.loc[0, '婚姻状况'] = row1[2]
  256. basicInfoDf.loc[0, '学位'] = row1[4]
  257. basicInfoDf.loc[0, '通讯地址'] = row1[8].replace("\n", "")
  258. basicInfoDf.loc[0, '就业状况'] = row1[5]
  259. # mobileDf = dfObj["mobileDf"];
  260. # basicInfoDf.loc[0, '历史电话号码数'] = mobileDf.index.size
  261. # basicInfoDf.loc[0, '近3个月电话号码数'] = getLastMonthMobileCount(mobileDf,3)
  262. #最近几个月电话号码数
  263. def getLastMonthMobileCount(df, month):
  264. # 当前日期
  265. last1MonthDateStr = time.strftime("%Y-%m-%d");
  266. # 最近一个月
  267. lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D')
  268. lastMonthMobileDf = df[df[5] >= str(lastMonthDate)]
  269. return lastMonthMobileDf.shape[0];
  270. # 配偶姓名
  271. # 配偶证件号码
  272. # 配偶工作单位
  273. # 配偶联系电话
  274. def parseMate(dfObj):
  275. df = dfObj["df"];
  276. if not df.empty:
  277. row1 = df.loc[1, :]
  278. mate["mateName"] = row1[0] # 配偶姓名
  279. mate["mateCardId"] = row1[2] # 配偶证件号码
  280. mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位
  281. mate["mateContactTel"] = row1[4]; # 配偶联系电话
  282. basicInfoDf.loc[0, '配偶姓名'] = row1[0]
  283. basicInfoDf.loc[0, '配偶证件号码'] = row1[2]
  284. basicInfoDf.loc[0, '配偶工作单位'] = row1[3].replace("\n", "");
  285. basicInfoDf.loc[0, '配偶联系电话'] = row1[4].replace("\n", "");
  286. #解析居住信息
  287. def parseLiveInfo(dfObj):
  288. df = dfObj["df"];
  289. if not df.empty:
  290. row1 = df.loc[1, :]
  291. basicInfoDf.loc[0, '居住地址'] = row1[1]
  292. basicInfoDf.loc[0, '住宅电话'] = row1[2]
  293. basicInfoDf.loc[0, '历史居住地个数'] = df.index.size-1;
  294. curDate = np.datetime64(time.strftime("%Y-%m-%d"));
  295. last3year = str(curDate)[0:4]
  296. last3yearDate = str(int(last3year)-3)+str(curDate)[4:10]
  297. lastLiveDf = df[df[4]>=last3yearDate];
  298. basicInfoDf.loc[0, '最近3年内居住地个数'] = lastLiveDf.index.size-1;
  299. houseIndex = df[df[3]=='自置'].index.size>0
  300. if (houseIndex):
  301. houseStr = '是'
  302. else:
  303. houseStr= '否'
  304. basicInfoDf.loc[0, '当前住房状态-是否具有自有住房'] = houseStr;
  305. basicInfoDf.loc[0, '居住状况'] = row1[3]
  306. basicInfoDf.loc[0, '居住信息更新日期'] = row1[4]
  307. # 个人住房贷款笔数 perHouseLoanAccount
  308. # 个人商用房(包括商住两用)贷款笔数 perBusHouseLoanAccount
  309. # 其他贷款笔数 otherLoanAccount
  310. # 贷记卡账户数 creditAccount
  311. # 贷款账龄(月数) loanMonthMin
  312. # 信用卡账龄(月数)creditCardMonthMin
  313. # 日期相减离当前时间月份
  314. # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4
  315. # def difMonth(dateStr):
  316. # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) -
  317. # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30);
  318. # 信贷交易明细汇总
  319. def parseLoanTradeInfo(dfObj):
  320. df = dfObj["df"];
  321. # row1 = df.loc[1, :]
  322. loanMonthDf = df[1: 4]
  323. loanMonthDf = loanMonthDf.reset_index(drop=True)
  324. loanTradeInfo["perHouseLoanAccount"] = loanMonthDf.loc[0, :][2] # 第0行第二列 个人住房贷款笔数
  325. briefInfoDf_loanTradeInfo.loc[0, '住房贷款笔数'] = loanMonthDf.loc[0, :][2]
  326. loanTradeInfo["perBusHouseLoanAccount"] = loanMonthDf.loc[1, :][2] # 第1行第二列 个人商用房(包括商住两用)贷款笔数
  327. briefInfoDf_loanTradeInfo.loc[0,'个人商用房(包括商住两用)贷款笔数']=loanMonthDf.loc[1, :][2]
  328. loanTradeInfo["otherLoanAccount"] = loanMonthDf.loc[2, :][2] # 第2行第二列 其他贷款笔数
  329. briefInfoDf_loanTradeInfo.loc[0, '其他贷款笔数'] = loanMonthDf.loc[2, :][2]
  330. loanMonthDf = loanMonthDf[loanMonthDf[3] != '--']
  331. loanMonthMin = loanMonthDf[3].min() # 首笔贷款发放月份最小值
  332. # if loanMonth != "":
  333. if loanMonthMin != "" and not math.isnan(float(loanMonthMin)):
  334. loanMonthMin = utils.difMonth(loanMonthMin)
  335. loanTradeInfo["loanMonthMin"] = loanMonthMin; # 贷款账龄(月数)
  336. creditCardDf = df[4: 6];
  337. creditCardDf = creditCardDf.reset_index(drop=True)
  338. creditCardMonthDf = creditCardDf[creditCardDf[3] != '--']
  339. creditCardMonthMin = creditCardMonthDf[3].min() # 首笔贷记卡发放月份最小值
  340. creditCardMonthDf = creditCardMonthDf.reset_index(drop=True)
  341. if creditCardMonthMin != "" and not math.isnan(float(creditCardMonthMin)):
  342. creditCardMonthMin = utils.difMonth(creditCardMonthMin)
  343. if str(creditCardMonthMin) != "nan":
  344. loanTradeInfo["creditCardMonthMin"] = creditCardMonthMin; # 信用卡账龄(月数)
  345. loanTradeInfo["creditAccount"] = creditCardDf.loc[0, :][2] # 第0行第3列 贷记卡账户数
  346. briefInfoDf_loanTradeInfo.loc[0, '贷记卡账户数'] = creditCardDf.loc[0, :][2]
  347. loanTradeInfo["creditAccountZ"] = creditCardDf.loc[1, :][2] # 第1行第3列
  348. briefInfoDf_loanTradeInfo.loc[0, '信用卡账户数'] = creditCardDf.loc[1, :][2]
  349. if loanTradeInfo["creditAccountZ"] != "--" and loanTradeInfo["creditAccount"] != "--":
  350. loanTradeInfo["creditAccount"] = int(loanTradeInfo["creditAccount"]) + int(loanTradeInfo["creditAccountZ"])
  351. # 解析呆账信息
  352. def parseBadDebtsInfoSumDf(dfObj):
  353. df = dfObj["df"];
  354. if not df.empty:
  355. row1 = df.loc[1, :]
  356. overdueBrief["badDebtsInfoSumAccount"] = row1[0]; # 呆账信息汇总笔数
  357. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账业务账户数'] = row1[0];
  358. overdueBrief["badDebtsInfoSumAmt"] = row1[1]; # 呆账信息汇总余额
  359. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '呆账信息余额'] = row1[1];
  360. else:
  361. overdueBrief["badDebtsInfoSumAccount"] = ""; # 呆账信息汇总笔数
  362. overdueBrief["badDebtsInfoSumAmt"] = ""; # 呆账信息汇总余额
  363. # 解析被追偿信息
  364. def parseRecoveryInfo(dfObj):
  365. df = dfObj["df"];
  366. if not df.empty:
  367. row1 = df.loc[1, :]
  368. row2 = df.loc[2, :]
  369. row3 = df.loc[3, :]
  370. overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数
  371. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户数'] = row1[1];
  372. overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额
  373. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '资产处置业务账户余额'] = replaceAmt(row1[2]);
  374. overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数
  375. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户数'] = row2[1];
  376. overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额
  377. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '垫款业务账户余额'] = replaceAmt(row2[2]);
  378. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总数'] = row3[1];
  379. briefInfoDf_recoveryInfo_badDebtsInfoSum.loc[0, '被追偿信息总额'] = replaceAmt(row3[2]);
  380. else:
  381. overdueBrief["disposalInfoSumAccount"] = ""; # 资产处置信息汇总笔数
  382. overdueBrief["disposalInfoSumAmt"] = ""; # 资产处置信息汇总余额
  383. overdueBrief["advanceInfoSumAccount"] = ""; # 垫款业务笔数
  384. overdueBrief["advanceInfoSumAmt"] = ""; # 垫款业务余额
  385. # 贷款逾期账户数
  386. # 贷款逾期月份数
  387. # 贷款单月最高逾期总额
  388. # 贷款最长逾期月数
  389. def parseOverdueInfo(dfObj):
  390. df = dfObj["df"];
  391. if not df.empty:
  392. row2= df.loc[2, :]
  393. row3 = df.loc[3, :]
  394. row4 = df.loc[4, :]
  395. row5 = df.loc[5, :]
  396. row6 = df.loc[6, :]
  397. briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数'] = row2[1];
  398. briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户数月数'] = row2[2];
  399. briefInfoDf_overdueInfoSum.loc[0, '非循环贷帐户单月最高逾期/透支总额'] = row2[3];
  400. briefInfoDf_overdueInfoSum.loc[0, '循环额度下分账户数'] = row3[1];
  401. briefInfoDf_overdueInfoSum.loc[0, '循环贷帐户数'] = row4[1];
  402. briefInfoDf_overdueInfoSum.loc[0, '贷记卡账户数'] = row5[1];
  403. briefInfoDf_overdueInfoSum.loc[0, '准贷记卡账户数'] = row6[1];
  404. # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum
  405. # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum
  406. # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum
  407. # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits
  408. # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance
  409. # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount
  410. # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数
  411. # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%')
  412. # 2.贷款额度>100元
  413. # 3.贷款状态不为“结清”"
  414. # 非循环贷账户信息汇总
  415. def doFilterCalc(dfx):
  416. dfx = dfx.replace('--', 0)
  417. return dfx;
  418. # 科学计数法转换
  419. def replaceAmt(dfx):
  420. return dfx.str.replace(',', '')
  421. # 非循环贷账户信息汇总 如有循环贷款和额度循环的需要进行汇总 TODO
  422. def parseLoanAccountInfoSum(dfObj):
  423. df = dfObj["df"];
  424. if not df.empty:
  425. loanAccountInfoSumDf = df[2:3];
  426. loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0
  427. loanAccountInfoSum["mgrOrgCount"] = np.sum(loanAccountInfoSumDf[0].astype('int'))
  428. loanAccountInfoSum["account"] = np.sum(loanAccountInfoSumDf[1].astype('int'))
  429. loanAccountInfoSum["creditTotalAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int'))
  430. loanAccountInfoSum["balance"] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int'))
  431. loanAccountInfoSum["last6AvgPayAmt"] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int'))
  432. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款法人机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int'))
  433. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款机构数'] = np.sum(loanAccountInfoSumDf[0].astype('int'))
  434. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款笔数'] = np.sum(loanAccountInfoSumDf[1].astype('int'))
  435. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同总额'] = np.sum(replaceAmt(loanAccountInfoSumDf[2]).astype('int'))
  436. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款合同余额'] = np.sum(replaceAmt(loanAccountInfoSumDf[3]).astype('int'))
  437. briefInfoDf_loanTradeCreditInfo.loc[0, '未结清贷款近6月平均应还款'] = np.sum(replaceAmt(loanAccountInfoSumDf[4]).astype('int'))
  438. # {"awardOrgCount":0,"account":0,"creditTotalAmt":0,"perMaxCreditTotalAmt":0,"perMinCreditTotalAmt":0,"useAmt":0,"last6AvgUseAmt":0}
  439. # 解析贷记卡信息汇总,包含准贷记卡
  440. def parseCreditCardInfoSum(dfObj):
  441. df = dfObj["df"];
  442. if not df.empty:
  443. creditCardInfoSumDf = df[2:3];
  444. creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0
  445. creditCardInfoSum["awardOrgCount"] = np.sum(creditCardInfoSumDf[0].astype('int'))
  446. creditCardInfoSum["account"] = np.sum(creditCardInfoSumDf[1].astype('int'))
  447. creditCardInfoSum["creditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int'))
  448. creditCardInfoSum["perMaxCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int'))
  449. creditCardInfoSum["perMinCreditTotalAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int'))
  450. creditCardInfoSum["useAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int'))
  451. creditCardInfoSum["last6AvgUseAmt"] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int'))
  452. briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡发卡机构数'] = np.sum(creditCardInfoSumDf[0].astype('int'))
  453. briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡账户数'] = np.sum(creditCardInfoSumDf[1].astype('int'))
  454. briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡授信总金额'] = np.sum(replaceAmt(creditCardInfoSumDf[2]).astype('int'))
  455. briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最高金额'] = np.sum(replaceAmt(creditCardInfoSumDf[3]).astype('int'))
  456. briefInfoDf_loanTradeCreditInfo.loc[0, '单家授信最低金额'] = np.sum(replaceAmt(creditCardInfoSumDf[4]).astype('int'))
  457. briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡已用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[5]).astype('int'))
  458. briefInfoDf_loanTradeCreditInfo.loc[0, '贷记卡最近6个月平均使用额度'] = np.sum(replaceAmt(creditCardInfoSumDf[6]).astype('int'))
  459. # 解析查询记录明细
  460. def parseQueryInfoDetail(dfObj):
  461. df = dfObj["df"];
  462. reportTime = queryInfo["reportTime"];
  463. if not df.empty:
  464. df = utils.replaceDateCol(df)
  465. df = df[1:df.index.size] # 去掉表头
  466. queryRecordDetail["last1MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 1, "",reportTime) # 去掉表头
  467. queryRecordDetail["last3MonthQueryTimes"] = qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  468. queryRecordDetail["last3MothLoanApproveTimes"] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove,reportTime)
  469. queryRecordDetailDf.loc[0,'近1月查询次数'] = qip.getLastMonthQueryTimes(df, 1, "",reportTime)
  470. queryRecordDetailDf.loc[0, '近3月查询次数'] = qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  471. queryRecordDetailDf.loc[0, '近6月查询次数'] = qip.getLastMonthQueryTimes(df, 6, "", reportTime)
  472. queryRecordDetailDf.loc[0, '近12月查询次数'] = qip.getLastMonthQueryTimes(df, 12, "", reportTime)
  473. queryRecordDetailDf.loc[0, '近3月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
  474. queryRecordDetailDf.loc[0, '近3月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
  475. queryRecordDetailDf.loc[0, '近6月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
  476. queryRecordDetailDf.loc[0, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
  477. queryRecordDetailDf.loc[0, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
  478. queryRecordDetailDf.loc[0, '近12月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
  479. queryRecordDetailDf.loc[0, '近3月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
  480. queryRecordDetailDf.loc[0, '近3月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
  481. queryRecordDetailDf.loc[0, '近6月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
  482. queryRecordDetailDf.loc[0, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
  483. queryRecordDetailDf.loc[0, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
  484. queryRecordDetailDf.loc[0, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
  485. queryRecordDetailDf.loc[0, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
  486. queryRecordDetailDf.loc[0, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
  487. queryRecordDetailDf.loc[0, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
  488. queryRecordDetailDf.loc[0, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
  489. queryRecordDetailDf.loc[0, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
  490. queryRecordDetailDf.loc[0, '最近12个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
  491. queryRecordDetailDf.loc[0, '最近12个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
  492. #解析贷款还款记录指标
  493. def parseLoanMergeAndPayRecordDf(df,payRcdDf):
  494. if not df.empty and not payRcdDf.empty:
  495. #正常
  496. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  497. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  498. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  499. #临时保存,不用过滤还款状态为0的
  500. payRcdMaxOverdueDf = overduePayRcdDf;
  501. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0]
  502. loanAccountInfoDf.loc[0, '当前贷款逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  503. loanAccountInfoDf.loc[0, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期账户数']/df.index.size,2)
  504. #存在逾期的贷款账户 非结清的过滤出逾期的账户号
  505. overdueLoanDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  506. loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size
  507. loanAccountInfoDf.loc[0, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[0, '当前贷款逾期机构数'] / df['管理机构'].unique().size,2)
  508. #还款记录按日期排序最近3笔的最大逾期期数
  509. loanAccountInfoDf.loc[0, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1);
  510. loanAccountInfoDf.loc[0, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  511. loanAccountInfoDf.loc[0, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  512. loanAccountInfoDf.loc[0, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  513. loanAccountInfoDf.loc[0, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  514. loanAccountInfoDf.loc[0, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
  515. payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"]
  516. # 贷款24期还款记录次数 剔除结清 转出 呆账
  517. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  518. #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
  519. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  520. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  521. #payRcdDf[(payRcdDf['还款状态']!='') & (payRcdDf['账户编号']==1)].index.size
  522. loanAccountInfoDf.loc[0, '贷款24期还款记录次数'] = np.max(payRcdTimes)
  523. #解析贷款账户信息指标
  524. def parseLoanMergeDf(df):
  525. if not df.empty:
  526. sortDf = df.sort_values(by=["开立日期","借款金额(本金)"] , ascending=(False,False))
  527. sortDf = sortDf[sortDf['账户状态'] == '结清'];
  528. sortDf = sortDf.reset_index(drop=True)
  529. if not sortDf.empty:
  530. row0 = sortDf.loc[0, :]
  531. loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)']
  532. loanAccountInfoDf.loc[0, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)']
  533. openDate = dfParser.formatDate(row0['开立日期'])
  534. loanAccountInfoDf.loc[0, '最近一笔结清贷款的发放距今月数'] = utils.difMonth(openDate)
  535. settleDate = dfParser.formatDate(row0['账户关闭日期'])
  536. loanAccountInfoDf.loc[0, '最近一笔结清贷款的结清距今月数'] = utils.difMonth(settleDate)
  537. loanAccountInfoDf.loc[0, '历史贷款总法人机构数'] = df['管理机构'].unique().size
  538. loanAccountInfoDf.loc[0, '当前同时在用的贷款机构数'] = df[df['借款金额(本金)']>0]['管理机构'].unique().size
  539. statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  540. bankDf = statusDf[statusDf['管理机构'].str.contains('银行')]
  541. #没有记录
  542. if statusDf.index.size==0:
  543. isNotBankCust = -1
  544. else:
  545. if bankDf.index.size >0:#有一条以上不为结清,请包含银行
  546. isNotBankCust = 1;
  547. else:
  548. isNotBankCust = 0;
  549. loanAccountInfoDf.loc[0, '是否有非银贷款客户'] = isNotBankCust
  550. #最严重的五级分类
  551. fiveType = ""
  552. for fiveTypeTmp in consts.fiveType:
  553. fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp];
  554. if not fiveTypeDf.empty:
  555. fiveType = fiveTypeTmp;
  556. break;
  557. loanAccountInfoDf.loc[0, '贷款五级分类'] = fiveType
  558. #当前贷款LTV
  559. # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0
  560. # 的记录后,SUM(本金余额) / SUM(贷款本金)
  561. # 如本金余额为空和贷款本金为0或为空,则当条记录不计算
  562. loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')]
  563. badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))]
  564. balanceSum = np.sum(badSetDf['余额(本金)'].astype('int'))
  565. loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int'))
  566. if(loanAmtSum !=0):
  567. loanAccountInfoDf.loc[0, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2)
  568. loanAccountInfoDf.loc[0, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2)
  569. loanAccountInfoDf.loc[0, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  570. loanAccountInfoDf.loc[0, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  571. houseLtvList = consts.houseLtvList;
  572. # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)]
  573. # if not houseLtvDf.empty:
  574. # loanAccountInfoDf.loc[0, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2)
  575. #['个人住房贷款','个人商用房(包括商住两用)贷款']
  576. loanAccountInfoDf.loc[0, '当前房贷LTV'] = lip.getCurLtv(badSetDf,houseLtvList)
  577. cardLtvList = ['个人汽车消费贷款']
  578. loanAccountInfoDf.loc[0, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList)
  579. operateLtvList = ['个人经营性贷款']
  580. loanAccountInfoDf.loc[0, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList)
  581. consumeLtvList = ['其他个人消费贷款']
  582. loanAccountInfoDf.loc[0, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList)
  583. bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行']
  584. loanAccountInfoDf.loc[0, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  585. bankLtvList = ['消费金融有限公司','汽车金融公司','信托投资']
  586. loanAccountInfoDf.loc[0, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  587. smallLoanLtvList = ['机构','小额信贷公司','财务公司']
  588. loanAccountInfoDf.loc[0, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList)
  589. #当前贷款最大逾期期数
  590. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数)
  591. loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  592. if not loanOverdueLtvDf.empty:
  593. loanAccountInfoDf.loc[0, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数'])
  594. loanAccountInfoDf.loc[0, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额'])
  595. loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True)
  596. maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数'])
  597. loanAccountInfoDf.loc[0, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额']
  598. loanAccountInfoDf.loc[0, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里
  599. loanAccountInfoDf.loc[0, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3)
  600. loanAccountInfoDf.loc[0, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3)
  601. loanAccountInfoDf.loc[0, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6)
  602. loanAccountInfoDf.loc[0, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6)
  603. loanAccountInfoDf.loc[0, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6)
  604. loanAccountInfoDf.loc[0, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12)
  605. loanAccountInfoDf.loc[0, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12)
  606. loanAccountInfoDf.loc[0, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12)
  607. lastLoanDf = loanOverdueLtvDf;
  608. if not lastLoanDf.empty:
  609. loanAccountInfoDf.loc[0, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"])
  610. normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)]
  611. #未结清贷款总账户数:账户状态不等于结清和转出的记录数
  612. notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  613. if not notSettleDf.empty:
  614. loanAccountInfoDf.loc[0, '当前正常贷款账户数'] = normalDf.index.size
  615. loanAccountInfoDf.loc[0, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2)
  616. loanAccountInfoDf.loc[0, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)'])
  617. # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总)
  618. loanAccountInfoDf.loc[0, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']),2)
  619. settleDf = df[(df['账户状态'] == '结清')]
  620. loanAccountInfoDf.loc[0, '当前正常结清贷款账户数'] = settleDf.index.size
  621. loanAccountInfoDf.loc[0, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2)
  622. #贷款24期还款记录次数 TODO
  623. # 最近3个月个人消费贷款发放额度
  624. loanAccountInfoDf.loc[0, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款'])
  625. loanAccountInfoDf.loc[0, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3)
  626. loanAccountInfoDf.loc[0, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6)
  627. loanAccountInfoDf.loc[0, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12)
  628. #解析贷记卡账户信息指标
  629. def parseCreditCardMergeDf(df):
  630. if not df.empty:
  631. # 历史信用卡总法人机构数
  632. creditCardAccountInfoDf.loc[0,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size
  633. creditCardUseDf = df[df['已用额度']>0];
  634. creditCardAccountInfoDf.loc[0,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size
  635. #统一排除
  636. creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  637. creditCardAccountInfoDf.loc[0,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf)
  638. creditCardAccountInfoDf.loc[0, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf)
  639. creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9)
  640. creditCardAccountInfoDf.loc[0, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, df, 1)
  641. # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度)
  642. useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  643. totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
  644. creditCardAccountInfoDf.loc[0, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2)
  645. creditCardAccountInfoDf.loc[0, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2)
  646. creditCardAccountInfoDf.loc[0, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2)
  647. creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数'] = np.max(creditDf['当前逾期期数'])
  648. creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期金额'] = np.max(creditDf['当前逾期总额'])
  649. if not creditDf.empty:
  650. creditDf = creditDf.reset_index(drop=True)
  651. maxOverdueIndex = np.argmax(creditDf['当前逾期期数'])
  652. creditCardAccountInfoDf.loc[0, '当前信用卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额']
  653. creditCardAccountInfoDf.loc[0, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3)
  654. creditCardAccountInfoDf.loc[0, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3)
  655. creditCardAccountInfoDf.loc[0, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3)
  656. creditCardAccountInfoDf.loc[0, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6)
  657. creditCardAccountInfoDf.loc[0, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6)
  658. creditCardAccountInfoDf.loc[0, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6)
  659. creditCardAccountInfoDf.loc[0, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12)
  660. creditCardAccountInfoDf.loc[0, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12)
  661. creditCardAccountInfoDf.loc[0, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12)
  662. if not creditDf.empty:
  663. creditCardAccountInfoDf.loc[0, '信用卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"])
  664. creditCardAccountInfoDf.loc[0, '贷记卡还款比例'] = round(np.sum(creditDf['本月应还款'])/np.sum(creditDf['本月实还款']),2)
  665. creditCardAccountInfoDf.loc[0, '贷记卡最高还款比例'] = round(np.max(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
  666. creditCardAccountInfoDf.loc[0, '贷记卡最低还款比例'] = round(np.min(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
  667. normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
  668. notCloseDf = df[(df['账户状态'] != '销户')]
  669. if not notCloseDf.empty and not normalDf.empty:
  670. creditCardAccountInfoDf.loc[0, '当前正常信用卡账户数'] = round(normalDf.index.size/notCloseDf.index.size,2)
  671. creditCardAccountInfoDf.loc[0, '当前正常信用卡已用额度'] = np.sum(normalDf['已用额度'])
  672. creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2)
  673. creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数'] = normalDf[normalDf['已用额度']>0].index.size
  674. if notCloseDf.empty:
  675. creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数占比'] = -99
  676. else:
  677. creditCardAccountInfoDf.loc[0, '当前正常信用卡账户余额占总余额比'] = round(creditCardAccountInfoDf.loc[0, '当前正常且有余额的信用卡账户数']/notCloseDf.index.size,2)
  678. creditCardAccountInfoDf.loc[0, '贷记卡本月实还款金额'] = np.sum(creditDf['本月实还款'])
  679. maxAmtDf = df[(df['币种'] == '人民币元')]
  680. if not maxAmtDf.empty:
  681. maxAmtDf = maxAmtDf.reset_index(drop=True)
  682. maxAmtIndex = np.argmax(maxAmtDf['账户授信额度'])
  683. maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期'];
  684. creditCardAccountInfoDf.loc[0, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"])+1;
  685. #解析贷款还款记录指标
  686. def parseCreditCardMergeAndPayRecordDf(df,payRcdDf):
  687. if not df.empty and not payRcdDf.empty:
  688. # 正常
  689. normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  690. if not normalDf.empty:
  691. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  692. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  693. # 临时保存,不用过滤还款状态为0的
  694. payRcdMaxOverdueDf = overduePayRcdDf;
  695. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0]
  696. creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  697. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数)
  698. creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期账户数'] / normalDf.index.size, 2)
  699. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数
  700. overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  701. creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size
  702. creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[0, '当前信用卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2)
  703. creditCardAccountInfoDf.loc[0, '近3月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  704. creditCardAccountInfoDf.loc[0, '近6月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  705. creditCardAccountInfoDf.loc[0, '近9月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  706. creditCardAccountInfoDf.loc[0, '近12月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
  707. creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  708. creditCardAccountInfoDf.loc[0, '近24月信用卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
  709. creditCardAccountInfoDf.loc[0, '最近3个月信用卡最大连续逾期月份数'] = 0;
  710. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  711. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  712. creditCardAccountInfoDf.loc[0, '贷记卡24期还款记录次数'] = np.max(payRcdTimes)
  713. def main(pdf_path):
  714. # 解析pdf开始
  715. with pdfplumber.open(pdf_path) as pdf:
  716. for p in range(0, len(pdf.pages)):
  717. page = pdf.pages[p]
  718. # first_page = pdf.pages[1]
  719. # if p == 3:
  720. # print(3)
  721. for i in range(0, len(page.extract_tables())):
  722. table = page.extract_tables()[i]
  723. df = pd.DataFrame(table);
  724. if len(keyList) > 1 and i == 0: # 判断是否被分页了
  725. if not utils.checkHeader(df, allHeaders):
  726. key = keyList[-1];
  727. dfObj = dfMap[key]
  728. # dfObj["nextDf"]=df;
  729. # 贷款信息 贷记卡信息 强制执行记录
  730. if key == "loanDfs" or key == "creditCardDfs" or key == "forceExecRcdDfs": # 属于列表
  731. lastDfObj = dfObj["dfs"][-1];
  732. lastDfObj["isByPage"] = str(p + 1);
  733. if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同
  734. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0,
  735. ignore_index=True); # 去最后一个进行合并
  736. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  737. else:
  738. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  739. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True);
  740. else: # 查询记录明细 为单个列表
  741. dfObj["isByPage"] = str(p + 1);
  742. if len(dfObj["df"].columns) == len(df.columns):
  743. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  744. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  745. else:
  746. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  747. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  748. # dfObj["nextDf"] = df;
  749. # 如果列数相等合并df
  750. continue;
  751. headerList0 = df.loc[0, :].tolist() # 第0行为表头
  752. headerList1 = []
  753. if df.index.size>1:
  754. headerList1 = df.loc[1, :].tolist() # 第1行为表头
  755. if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据
  756. queryInfoDf = df;
  757. dfKey = "queryInfoDf"
  758. dfMap[dfKey]["df"] = df;
  759. keyList.append(dfKey);
  760. elif headerList0 == identity_header: # 身份信息
  761. identityDf = df[:2] # 截取前2行
  762. addressDf = df.iloc[2:4, [0, 5]] # 截取3到4行的第一和6
  763. addressDf = addressDf.reset_index(drop=True)
  764. mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5)
  765. identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并
  766. dfKey = "identityDf"
  767. dfMap[dfKey]["df"] = identityDf;
  768. keyList.append(dfKey);
  769. # 组装电话号码df
  770. dfMap[dfKey]["mobileDf "] = mobileDf
  771. elif headerList0 == mateDf_header: # 配偶信息
  772. mateDf = df;
  773. dfKey = "mateDf"
  774. dfMap[dfKey]["df"] = df;
  775. keyList.append(dfKey);
  776. elif headerList0 == liveInfoDf_header: # 居住信息
  777. mateDf = df;
  778. dfKey = "liveInfoDf"
  779. dfMap[dfKey]["df"] = df;
  780. keyList.append(dfKey);
  781. elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页
  782. occupationInfoDf = df;
  783. dfKey = "occupationInfoDf"
  784. dfMap[dfKey]["df"] = df;
  785. keyList.append(dfKey);
  786. elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据
  787. queryInfoBriefDf = df;
  788. dfKey = "queryInfoBriefDf"
  789. dfMap[dfKey]["df"] = df;
  790. keyList.append(dfKey);
  791. elif headerList0 == loanTradeInfo_header: # 信贷交易信息
  792. loanTradeInfoDf = df;
  793. dfKey = "loanTradeInfoDf";
  794. dfMap[dfKey]["df"] = df;
  795. keyList.append(dfKey);
  796. elif headerList0 == recoveryInfoSumDf_header: # 被追偿信息汇总
  797. recoveryInfoSumDf = df;
  798. dfKey = "recoveryInfoSumDf";
  799. dfMap[dfKey]["df"] = df;
  800. keyList.append(dfKey);
  801. elif headerList0 == badDebtsInfoSumDf_header: # 呆账信息
  802. badDebtsInfoSumDf = df;
  803. dfKey = "badDebtsInfoSumDf";
  804. dfMap[dfKey]["df"] = df;
  805. keyList.append(dfKey);
  806. elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总
  807. overdueInfoSumDf = df;
  808. dfKey = "overdueInfoSumDf";
  809. dfMap[dfKey]["df"] = df;
  810. keyList.append(dfKey);
  811. elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总
  812. loanAccountInfoSumDf = df;
  813. dfKey = "loanAccountInfoSumDf";
  814. dfMap[dfKey]["df"] = df;
  815. keyList.append(dfKey);
  816. elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总
  817. creditCardInfoSumDf = df;
  818. dfKey = "creditCardInfoSumDf";
  819. dfMap[dfKey]["df"] = df;
  820. keyList.append(dfKey);
  821. elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据
  822. creditCardInfoSumDfZ = df;
  823. dfKey = "creditCardInfoSumDfZ";
  824. dfMap[dfKey]["df"] = df;
  825. keyList.append(dfKey);
  826. elif list(filter(None, headerList0)) == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户
  827. dfKey = "loanDfs";
  828. dfMap[dfKey]["dfs"].append({"df": df});
  829. keyList.append(dfKey);
  830. elif list(filter(None, headerList0)) == creditCard_header: # 贷记卡账户
  831. dfKey = "creditCardDfs";
  832. dfMap[dfKey]["dfs"].append({"df": df});
  833. keyList.append(dfKey);
  834. elif list(filter(None, headerList0)) == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并
  835. dfKey = "creditCardDfsZ";
  836. dfMap[dfKey]["dfs"].append({"df": df});
  837. keyList.append(dfKey);
  838. elif list(filter(None, headerList0)) == queryRecordDetailDf_header: # 查询记录明细
  839. dfKey = "queryRecordDetailDf";
  840. dfMap[dfKey]["df"] = df;
  841. keyList.append(dfKey);
  842. elif list(filter(None, headerList0)) == forceExecRcdDfs_header: # 强制执行记录
  843. dfKey = "forceExecRcdDfs";
  844. dfMap[dfKey]["dfs"].append({"df": df});
  845. keyList.append(dfKey);
  846. # 设置分页
  847. dfMap[dfKey]["page"] = p + 1;
  848. # 打印结果解析并构建指标
  849. for key in dfMap:
  850. if dfMap[key].__contains__("page"):
  851. logger.info(key + "-page-" + str(dfMap[key]["page"]))
  852. if dfMap[key].__contains__("dfs"):
  853. if key == "loanDfs": # 贷款账户
  854. for idx in range(0, len(dfMap[key]["dfs"])):
  855. tempDfObj = dfMap[key]["dfs"][idx];
  856. if tempDfObj.__contains__("isByPage"):
  857. # print("贷款账户被分页#################")
  858. # print(key + "============被分页页数============" + str(tempDfObj["isByPage"]))
  859. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  860. logger.info(tempDfObj["df"].values)
  861. else: # 未被分页
  862. logger.info(tempDfObj["df"].values)
  863. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  864. elif key == "creditCardDfs": # 贷记卡账户合并
  865. for idx in range(0, len(dfMap[key]["dfs"])):
  866. tempDfObj = dfMap[key]["dfs"][idx];
  867. creditCardAccountDfs.append(dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime']))
  868. else: # 其他
  869. for tempDfObj in (dfMap[key]["dfs"]):
  870. if tempDfObj.__contains__("isByPage"):
  871. logger.info(key + "============其他被分页页数============" + str(tempDfObj["isByPage"]))
  872. logger.info(tempDfObj["df"].values)
  873. else: # 单笔
  874. tempDfObj = dfMap[key];
  875. if tempDfObj.__contains__("isByPage"):
  876. logger.info(key + "============被分页页数================" + str(tempDfObj["isByPage"]))
  877. logger.info(tempDfObj["df"].values)
  878. if key == "queryInfoDf": # 解析被查询信息
  879. parseQueryInfo(tempDfObj);
  880. # print("\033[1;31m +查询信息+ \033[0m")
  881. # print(queryInfo)
  882. elif key == "identityDf": # 身份信息
  883. parseIdentity(tempDfObj)
  884. # print("\033[1;31m +身份信息+ \033[0m")
  885. # print(identity)
  886. elif key == "mateDf": # 配偶信息
  887. parseMate(tempDfObj)
  888. # print("\033[1;31m +配偶信息+ \033[0m")
  889. # print(mate)
  890. elif key == "liveInfoDf": # 居住信息
  891. parseLiveInfo(tempDfObj)
  892. # print("\033[1;31m +居住信息+ \033[0m")
  893. elif key == "loanTradeInfoDf": # 信贷交易信息提示
  894. parseLoanTradeInfo(tempDfObj);
  895. # print("\033[1;31m +信贷交易信息提示+ \033[0m")
  896. # print(loanTradeInfo)
  897. elif key == "badDebtsInfoSumDf": # 呆账信息汇总
  898. parseBadDebtsInfoSumDf(tempDfObj)
  899. # print("\033[1;31m +呆账信息汇总+ \033[0m")
  900. # print(overdueBrief)
  901. elif key == "recoveryInfoDf": # 被追偿信息汇总-资产处置和垫款
  902. parseRecoveryInfo(tempDfObj)
  903. # print("\033[1;31m +资产处置和垫款+ \033[0m")
  904. # print(overdueBrief)
  905. elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总
  906. parseOverdueInfo(tempDfObj)
  907. # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m")
  908. # print(overdueInfo)
  909. elif key == "loanAccountInfoSumDf": # 需要纳入循环贷及额度下循环分账户 TODO
  910. tempDfObj_cycleLoanAccount = dfMap["cycleLoanAccountInfoSumDf"];
  911. tempDfObj_cycleCredit = dfMap["cycleCreditAccountInfoSumDf"];
  912. if not tempDfObj_cycleLoanAccount["df"].empty: # 循环贷
  913. tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleLoanAccount["df"][2:3]], axis=0,
  914. ignore_index=True)
  915. if not tempDfObj_cycleCredit["df"].empty: # 额度下循环分账户
  916. tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleCredit["df"][2:3]], axis=0,
  917. ignore_index=True)
  918. parseLoanAccountInfoSum(tempDfObj)
  919. # print("\033[1;31m +贷款信息汇总+ \033[0m")
  920. # print(loanAccountInfoSum)
  921. elif key == "creditCardInfoSumDf":
  922. tempDfObjZ = dfMap["creditCardInfoDfZ"]; # 准贷记卡纳入计算 2:3为准贷记卡数据
  923. if not tempDfObjZ["df"].empty:
  924. tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObjZ["df"][2:3]], axis=0, ignore_index=True)
  925. parseCreditCardInfoSum(tempDfObj)
  926. # print("\033[1;31m +贷记卡信息汇总+ \033[0m")
  927. # print(creditCardInfoSum)
  928. elif key == "queryRecordDetailDf": # 查询记录明细
  929. parseQueryInfoDetail(tempDfObj)
  930. # print("\033[1;31m +查询记录明细+ \033[0m")
  931. # print(queryInfoDetail)
  932. result = ""
  933. # 基本信息
  934. result+=("\033[1;34m +基本信息+ \033[0m")+"\n"
  935. result+=utils.toJson(basicInfoDf)+"\n"
  936. result+=("\033[1;34m +概要信息+ \033[0m")+"\n"
  937. result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+"\n"
  938. result+=utils.toJson(briefInfoDf_loanTradeInfo)+"\n"
  939. result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+"\n"
  940. result+=utils.toJson(briefInfoDf_recoveryInfo_badDebtsInfoSum)+"\n"
  941. result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+"\n"
  942. result+=utils.toJson(briefInfoDf_overdueInfoSum)+"\n"
  943. result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+"\n"
  944. result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+"\n"
  945. # 单独输出贷款df
  946. logger.info("\033[1;34m +贷款信息Dataframe+ \033[0m")
  947. logger.info(dfParser.dfHeaderLoan)
  948. loanMergeDf = pd.DataFrame()
  949. loanPayRecordMergeDf = pd.DataFrame()
  950. # 输出数据
  951. for loanDfObj in loanAccountDfs:
  952. loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True);
  953. loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0,
  954. ignore_index=True);
  955. logger.info(loanMergeDf.values)
  956. logger.info("\033[1;34m +贷款信息还款记录Dataframe+ \033[0m")
  957. logger.info(dfParser.dfHeaderLoanPayRecord)
  958. logger.info(loanPayRecordMergeDf.values)
  959. # 解析贷款账户指标
  960. parseLoanMergeDf(loanMergeDf);
  961. # 解析还款记录相关指标
  962. parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf);
  963. # logger.info(loanAccountInfo)
  964. logger.info(consts.loanAccountInfoHeader)
  965. logger.info(loanAccountInfoDf.values)
  966. result+=("\033[1;34m +贷款账户信息+ \033[0m")+"\n"
  967. result+=utils.toJson(loanAccountInfoDf)+"\n"
  968. #贷记卡合并df
  969. creditCardMergeDf = pd.DataFrame()
  970. creditCardPayRecordMergeDf = pd.DataFrame()
  971. logger.info("\033[1;34m +贷记卡信息Dataframe+ \033[0m")
  972. logger.info(dfParser.dfHeaderCreditCard)
  973. # 输出数据
  974. for creditCardDfObj in creditCardAccountDfs:
  975. creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True);
  976. creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True);
  977. logger.info(creditCardMergeDf.values)
  978. # 解析贷记卡账户指标
  979. parseCreditCardMergeDf(creditCardMergeDf);
  980. parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf)
  981. result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+"\n"
  982. result+=utils.toJson(creditCardAccountInfoDf)+"\n"
  983. result+=("\033[1;34m +查询记录明细+ \033[0m")+"\n"
  984. result+=utils.toJson(queryRecordDetailDf)+"\n"
  985. return result;
  986. # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig')
  987. if __name__ == '__main__':
  988. start = timeit.default_timer();
  989. basePath = "D:/mydocument/myproject/git/busscredit/Crerdai/";
  990. pdf_path = basePath + "闻海雁532329198801060347.pdf"
  991. pdf_path = basePath+"雷雨晴130630199006130027.pdf"
  992. pdf_path=basePath+"杨安140402197102111236.pdf"
  993. pdf_path=basePath+"刘盼兰130133198912261210.pdf"
  994. pdf_path=basePath+"马维强130521198604045272.pdf"
  995. pdf_path = basePath + "郑晨晨130681199008205811.pdf"
  996. # pdf_path=basePath+"人行征信模拟数据报告.pdf"
  997. basePath = "D:/mydocument/myproject/git/busscredit/20200414_report/";
  998. pdf_path = basePath + "艾思语51112319960218732X.pdf"
  999. isBat = False;
  1000. if isBat:
  1001. for file in os.listdir(basePath):
  1002. if file.endswith("pdf"):
  1003. pdf_path = basePath+file;
  1004. outPath = pdf_path.replace("pdf",'txt')
  1005. if os.path.exists(outPath):
  1006. continue;
  1007. logger.info(file + "解析开始...")
  1008. result = main(pdf_path)
  1009. # print(result)
  1010. #输出到文件
  1011. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1012. print(result.replace("\033[1;34m","").replace("\033[0m",""))
  1013. logger.info(file+"解析完成")
  1014. else:
  1015. outPath = pdf_path.replace("pdf", 'txt')
  1016. result = main(pdf_path)
  1017. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1018. print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  1019. s = timeit.default_timer() - start;
  1020. logger.info(str(s) + " 秒")