parseCreditPdf.py0421 126 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702
  1. import pdfplumber
  2. import pandas as pd
  3. import os
  4. os.environ["MODIN_ENGINE"] = "ray" # Modin will use Ray
  5. os.environ["MODIN_ENGINE"] = "dask" # Modin will use Dask
  6. import numpy as np;
  7. import sys
  8. import os
  9. #指标相关
  10. import loanIndexParser as lip;
  11. import payRcdIndexParser as prp;
  12. import creditCardIndexParser as cip
  13. import queryInfoIndexParser as qip
  14. import utils;
  15. import time;
  16. import consts;
  17. import math
  18. import dfParser;
  19. import gc
  20. pd.set_option('mode.chained_assignment', None)
  21. import log
  22. logger = log.logger
  23. # 查询信息
  24. dfMap = {};
  25. allHeaders = [] # 所有表头
  26. queryInfoDf = pd.DataFrame();
  27. queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"];
  28. dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None};
  29. allHeaders.append(queryInfoDf_header);
  30. # 身份信息
  31. identityDf = pd.DataFrame();
  32. identity_header = ['性别', '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱']
  33. addressDf = pd.DataFrame(); # 通讯地址
  34. dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None};
  35. allHeaders.append(identity_header);
  36. # 配偶信息
  37. mateDf = pd.DataFrame();
  38. mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话']
  39. dfMap["mateDf"] = {"df": mateDf, "nextDf": None};
  40. allHeaders.append(mateDf_header);
  41. # 居住信息====暂时该信息没有用到先不解析
  42. liveInfoDf = pd.DataFrame();
  43. liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期']
  44. dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None};
  45. allHeaders.append(liveInfoDf_header);
  46. # 职业信息
  47. occupationDf = pd.DataFrame();
  48. occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话']
  49. occupationInfo_header1 = ['编号', '职业', '行业', '职务', '职称', '进入本单位年份', '信息更新日期']
  50. dfMap["occupationDf"] = ({"df": occupationDf, "nextDf": None});
  51. # allHeaders.append(occupationInfo_header1);
  52. allHeaders.append(occupationInfo_header);
  53. # 上次查询记录
  54. preQueryRcd_header0 = ['上一次查询记录']
  55. allHeaders.append(preQueryRcd_header0);
  56. # 查询记录概要
  57. queryInfoBriefDf = pd.DataFrame();
  58. queryInfoBrief_header0 = ['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  59. queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查']
  60. dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None});
  61. allHeaders.append(queryInfoBrief_header0);
  62. allHeaders.append(queryInfoBrief_header1);
  63. # 信贷交易信息提示
  64. loanTradeInfoDf = pd.DataFrame();
  65. loanTradeInfo_header = ['业务类型', '账户数', '首笔业务发放月份'];
  66. dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None});
  67. allHeaders.append(loanTradeInfo_header)
  68. # 信贷交易违约信息概要
  69. # 被追偿信息汇总 资产处置和垫款业务
  70. recoveryInfoSumDf = pd.DataFrame();
  71. recoveryInfoSumDf_header = ['业务类型', '账户数', '余额'];
  72. dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None});
  73. allHeaders.append(recoveryInfoSumDf_header)
  74. # 呆账信息汇总
  75. badDebtsInfoSumDf = pd.DataFrame();
  76. badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总
  77. dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None});
  78. allHeaders.append(badDebtsInfoSumDf_header)
  79. # 逾期透资信息汇总
  80. overdueInfoSumDf = pd.DataFrame();
  81. overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数']
  82. dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None});
  83. allHeaders.append(overdueInfoSumDf_header)
  84. # 非循环贷账户信息汇总
  85. loanAccountInfoSumDf = pd.DataFrame();
  86. loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总']
  87. loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  88. dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None});
  89. allHeaders.append(loanAccountInfoSumDf_header0)
  90. allHeaders.append(loanAccountInfoSumDf_header1)
  91. # 循环额度下分账户信息汇总
  92. cycleCreditAccountInfoSumDf = pd.DataFrame();
  93. cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总']
  94. cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'],
  95. dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None});
  96. allHeaders.append(cycleCreditAccountInfoSumDf_header0)
  97. allHeaders.append(cycleCreditAccountInfoSumDf_header1)
  98. # 循环贷账户信息汇总
  99. cycleLoanAccountInfoSumDf = pd.DataFrame();
  100. cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总']
  101. cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  102. dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None});
  103. allHeaders.append(cycleLoanAccountInfoSumDf_header0)
  104. allHeaders.append(cycleLoanAccountInfoSumDf_header1)
  105. # 贷记卡账户信息汇总
  106. creditCardInfoSumDf = pd.DataFrame();
  107. creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总']
  108. creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  109. dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None});
  110. allHeaders.append(creditCardInfoSumDf_header0)
  111. allHeaders.append(creditCardInfoSumDf_header1)
  112. # 准贷记卡账户信息汇总
  113. creditCardInfoSumDfZ = pd.DataFrame();
  114. creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总']
  115. creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  116. dfMap["creditCardInfoSumDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None});
  117. allHeaders.append(creditCardInfoSumDfZ_header0)
  118. allHeaders.append(creditCardInfoSumDfZ_header1)
  119. #公共信息概要
  120. publicInfoBriefDf = pd.DataFrame();
  121. publicInfoBriefDf_header0 = ['公共信息汇总']
  122. dfMap["publicInfoBriefDf"] = ({"df": publicInfoBriefDf, "nextDf": None});
  123. allHeaders.append(publicInfoBriefDf_header0)
  124. #查询记录汇总
  125. queryRecordSumDf_header0=['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  126. queryRecordSumDf = pd.DataFrame();
  127. dfMap["queryRecordSumDf"] = ({"df": queryRecordSumDf, "nextDf": None});
  128. allHeaders.append(queryRecordSumDf_header0)
  129. # 非循环贷账户,循环额度下分账户
  130. # 循环贷账户
  131. loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种']
  132. loanDfs = [];
  133. dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []});
  134. allHeaders.append(loan_header)
  135. # 贷记卡账户
  136. creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式']
  137. creditCardDfs = [];
  138. dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []});
  139. allHeaders.append(creditCard_header)
  140. # 准备贷记卡账户
  141. creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式']
  142. creditCardDfsZ = [];
  143. dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []});
  144. allHeaders.append(creditCardZ_header)
  145. #
  146. # 相关还款责任信息汇总 未使用到
  147. # 信贷交易信息明细
  148. # 被追偿信息 未使用到
  149. recoveryInfoDfs_header = ['管理机构','业务种类','债权接收日期','债权金额','债权转移时的还款状态']
  150. recoveryInfoDfs = [];
  151. dfMap["recoveryInfoDfs"] = ({"dfs": recoveryInfoDfs, "nextDf": []});
  152. allHeaders.append(recoveryInfoDfs_header)
  153. # 公共信息明细
  154. # 强制执行记录
  155. forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式']
  156. forceExecRcdDfs = [];
  157. dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []});
  158. allHeaders.append(forceExecRcdDfs_header)
  159. # 查询记录
  160. queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因']
  161. dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []});
  162. allHeaders.append(queryRecordDetailDf_header)
  163. # 处理分页思路
  164. # df估计得放到对象里面,然后存储下一个df,一个对象里包含key
  165. # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df
  166. # 针对可合并的列的场景
  167. # =======
  168. keyList = [] # 存储所有的df的key列表
  169. # pd.Series()
  170. # 检查数据是否带表头
  171. # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下
  172. import timeit
  173. # 定义指标部分======================start
  174. reportTime = ""; # 报告时间
  175. # 被查询者姓名
  176. queryInfoName = "";
  177. queryInfoCardId = "" # 被查询者证件号码
  178. # 定义指标部分======================end
  179. # 被查询信息-基础信息
  180. # 报告时间
  181. # 被查询者姓名
  182. # 被查询者证件号码
  183. # 基础信息
  184. queryInfo = {"reportTime":""}
  185. # 身份信息
  186. identity = {}
  187. # 配偶信息
  188. mate = {}
  189. # 信贷交易信息提示-信用提示
  190. loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0,
  191. 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0}
  192. # 逾期及违约信息概要
  193. overdueBrief = {}
  194. # 逾期及透资信息汇总
  195. # 贷款逾期账户数 loanOverdueAccount
  196. # 贷款逾期月份数 loanOverdueMonth
  197. # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal
  198. # 贷款最长逾期月数 loanMaxOverdueMonth
  199. overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "",
  200. "loanMaxOverdueMonth": "",
  201. "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "",
  202. "creditCardMaxOverdueMonth": ""}
  203. # 未结清贷款信息汇总
  204. # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  205. loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0}
  206. # 未销户贷记卡发卡法人机构数
  207. # 未销户贷记卡发卡机构数
  208. # 未销户贷记卡账户数
  209. # 未销户贷记卡授信总额
  210. # 未销户贷记卡单家行最高授信额
  211. # 未销户贷记卡单家行最低授信额
  212. # 未销户贷记卡已用额度
  213. # 未销户贷记卡近6月平均使用额度
  214. # 未结清贷记卡信息汇总
  215. # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  216. creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0,
  217. "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0}
  218. # 信 贷 审 批 查 询 记 录 明 细
  219. queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0,
  220. "lastTimeLoanApproveMonth": 0}
  221. #最近一笔结清贷款的贷款金额 
  222. loanAccountInfo = {"lastSettleLoanAmt": 0}
  223. loanAccountDfs=[];#横向合并
  224. creditCardAccountDfs=[];#贷记卡账户合并
  225. creditCardAccountDfsZ=[];#准贷记卡账户合并
  226. #============================指标定义区 start=============================
  227. #基本信息 拆分
  228. # basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0])
  229. #身份信息
  230. identityInfoIndex = '身份信息'
  231. identityInfoDf = pd.DataFrame(columns=consts.identityInfoHeader,index=[identityInfoIndex])
  232. #配偶信息
  233. mateInfoIndex = '配偶信息'
  234. mateInfoDf = pd.DataFrame(columns=consts.mateInfoHeader,index=[mateInfoIndex])
  235. #居住信息
  236. liveInfoIndex = '居住信息'
  237. liveInfoDf = pd.DataFrame(columns=consts.liveInfoHeader,index=[liveInfoIndex])
  238. #职业信息
  239. occupationInfoIndex = '职业信息'
  240. occupationInfoDf = pd.DataFrame(columns=consts.occupationInfoHeader,index=[occupationInfoIndex])
  241. #信贷交易信息提示
  242. loanTradeInfoIndex = '信贷交易信息提示'
  243. briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo,index=[loanTradeInfoIndex])
  244. #被追偿信息汇总及呆账信息汇总
  245. recoveryInfoSumIndex = '信贷交易违约信息概要'
  246. briefInfoDf_recoveryInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo,index=[recoveryInfoSumIndex])
  247. #呆账信息汇总
  248. badDebtsInfoIndex = '呆账信息汇总'
  249. briefInfoDf_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_badDebtsInfoSum,index=[badDebtsInfoIndex])
  250. #逾期(透支)信息汇总
  251. overdueInfoSumIndex='逾期(透支)信息汇总'
  252. briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum,index=[overdueInfoSumIndex])
  253. #信贷交易授信及负债信息概要
  254. loanTradeCreditInfoIndex='信贷交易授信及负债信息概要'
  255. briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo,index=[loanTradeCreditInfoIndex]).fillna(0.0)
  256. #公共信息概要
  257. publicInfoBriefIndex = '公共信息概要'
  258. publicInfoBriefDf = pd.DataFrame(columns=consts.publicInfoBriefHeader,index=[publicInfoBriefIndex])
  259. #查询记录汇总
  260. queryRecordSumIndex = '查询记录汇总'
  261. queryRecordSumDf = pd.DataFrame(columns=consts.queryRecordSumHeader,index=[queryRecordSumIndex])
  262. #信贷交易明细-被追偿信息
  263. recoveryInfoIndex='被追偿信息'
  264. creditTradeDetailDf_recoveryInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_recoveryInfo,index=[recoveryInfoIndex])
  265. #信贷交易明细
  266. #非循环贷账户
  267. loanInfoIndex='非循环贷账户'
  268. creditTradeDetailDf_loanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_loanAccountInfo,index=[loanInfoIndex])
  269. #循环额度下分账户
  270. cycleCreditAccountInfoIndex='循环额度下分账户'
  271. creditTradeDetailDf_cycleCreditAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleCreditAccountInfo,index=[cycleCreditAccountInfoIndex])
  272. #循环贷账户
  273. cycleLoanAccountInfoIndex='循环贷账户'
  274. creditTradeDetailDf_cycleLoanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleLoanAccountInfo,index=[cycleLoanAccountInfoIndex])
  275. #贷款信息
  276. loanAccountInfoIndex='贷款信息'
  277. loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader,index=[loanAccountInfoIndex])
  278. #贷记卡信息
  279. creditCardAccountInfoIndex = '贷记卡账户'
  280. creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader,index=[creditCardAccountInfoIndex])
  281. #准贷记卡
  282. creditCardAccountInfoIndexZ = '准贷记卡账户'
  283. creditCardAccountInfoDfZ = pd.DataFrame(columns=consts.creditCardAccountInfoHeaderZ,index=[creditCardAccountInfoIndexZ])
  284. useRateIndex = '使用率'
  285. useRateDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_useRate,index=[useRateIndex])
  286. openAccountIndex = '开户数'
  287. openAccountDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_openAccount,index=[openAccountIndex])
  288. payRcdStatusIndex = '24期还款状态'
  289. payRcdStatusDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_payRcdStatus,index=[payRcdStatusIndex])
  290. #查询记录明细指标
  291. queryRecordDetailIndex = '信贷审批查询记录明细'
  292. queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader,index=[queryRecordDetailIndex])
  293. #============================指标定义区 end=============================
  294. # 解析被查询信息指标
  295. def parseQueryInfo(dfObj):
  296. df = dfObj["df"];
  297. reportTime = df.loc[0, :][3]
  298. reportTime = reportTime.split(":")[1]
  299. reportTime = reportTime.replace(".", "-"); # 报告时间
  300. queryInfo["reportTime"] = reportTime
  301. row = df.loc[2, :]
  302. queryInfo["queryInfoName"] = row[0]; # 被查询者姓名
  303. # basicInfoDf.loc[0, '姓名'] = row[0]
  304. queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码
  305. # basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "")
  306. # 婚姻状况
  307. # 学历
  308. # 单位电话
  309. # 住宅电话
  310. # 通讯地址
  311. def parseIdentity(dfObj):
  312. df = dfObj["df"];
  313. row1 = df.loc[1, :].dropna().reset_index(drop=True)
  314. # identity["marital"] = row1[3] # 婚姻状况
  315. # identity["education"] = row1[4] # 学历
  316. # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址
  317. identityInfoDf.loc[identityInfoIndex, '性别'] = row1[0]
  318. identityInfoDf.loc[identityInfoIndex, '出生日期'] = dfParser.formatDate(row1[1])[0:7]
  319. identityInfoDf.loc[identityInfoIndex, '国籍'] = row1[6]
  320. identityInfoDf.loc[identityInfoIndex, '户籍地址'] = row1[9].replace("\n", "")
  321. identityInfoDf.loc[identityInfoIndex, '婚姻状况'] = row1[2]
  322. identityInfoDf.loc[identityInfoIndex, '学位'] = row1[4]
  323. identityInfoDf.loc[identityInfoIndex, '通讯地址'] = row1[8].replace("\n", "")
  324. identityInfoDf.loc[identityInfoIndex, '就业状况'] = row1[5]
  325. mobileDf = dfObj["mobileDf"];
  326. identityInfoDf.loc[identityInfoIndex, '历史手机号码数'] = mobileDf.index.size
  327. identityInfoDf.loc[identityInfoIndex, '近3个月手机号码数'] = getLastMonthMobileCount(mobileDf,3,reportTime)
  328. identityInfoDf.loc[identityInfoIndex, '近6个月手机号码数'] = getLastMonthMobileCount(mobileDf, 6,reportTime)
  329. identityInfoDf.loc[identityInfoIndex, '近12个月手机号码数'] = getLastMonthMobileCount(mobileDf, 12,reportTime)
  330. identityInfoDf.loc[identityInfoIndex, '近24个月手机号码数'] = getLastMonthMobileCount(mobileDf, 24,reportTime)
  331. #最近几个月电话号码数
  332. def getLastMonthMobileCount(df, month,reportTime):
  333. # 当前日期
  334. last1MonthDateStr = reportTime
  335. # 最近一个月
  336. lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D')
  337. lastMonthMobileDf = df[df[5] >= str(lastMonthDate)]
  338. return lastMonthMobileDf.shape[0];
  339. # 配偶姓名
  340. # 配偶证件号码
  341. # 配偶工作单位
  342. # 配偶联系电话
  343. def parseMate(dfObj):
  344. df = dfObj["df"];
  345. if not df.empty:
  346. row1 = df.loc[1, :]
  347. mate["mateName"] = row1[0] # 配偶姓名
  348. mate["mateCardId"] = row1[2] # 配偶证件号码
  349. mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位
  350. mate["mateContactTel"] = row1[4]; # 配偶联系电话
  351. mateInfoDf.loc[mateInfoIndex, '姓名'] = row1[0]
  352. mateInfoDf.loc[mateInfoIndex, '证件号码'] = row1[2]
  353. mateInfoDf.loc[mateInfoIndex, '工作单位'] = row1[3].replace("\n", "");
  354. mateInfoDf.loc[mateInfoIndex, '联系电话'] = row1[4].replace("\n", "");
  355. #解析居住信息
  356. def parseLiveInfo(dfObj):
  357. df = dfObj["df"];
  358. if not df.empty:
  359. row1 = df.loc[1, :]
  360. liveInfoDf.loc[liveInfoIndex, '居住地址'] = row1[1]
  361. liveInfoDf.loc[liveInfoIndex, '住宅电话'] = row1[2]
  362. liveInfoDf.loc[liveInfoIndex, '历史居住地址个数'] = df.index.size-1;
  363. curDate = np.datetime64(time.strftime("%Y-%m-%d"));
  364. last3year = str(curDate)[0:4]
  365. last3yearDate = str(int(last3year)-3)+str(curDate)[4:10]
  366. lastLiveDf = df[df[4]>=last3yearDate];
  367. liveInfoDf.loc[liveInfoIndex, '最近3年内居住地址个数'] = lastLiveDf.index.size-1;
  368. houseIndex = df[df[3]=='自置'].index.size>0
  369. if (houseIndex):
  370. houseStr = '是'
  371. else:
  372. houseStr= '否'
  373. liveInfoDf.loc[liveInfoIndex, '当前居住状况-是否具有自有住房'] = houseStr;
  374. liveInfoDf.loc[liveInfoIndex, '居住状况'] = row1[3]
  375. liveInfoDf.loc[liveInfoIndex, '信息更新日期'] = row1[4]
  376. #解析职业信息
  377. def parseOccupationInfoDf(dfObj):
  378. df = dfObj["df"];
  379. if not df.empty:
  380. occIndex1 = 0#判断职业从哪行开始
  381. for i in range(0,df.index.size):
  382. if df.loc[i,:].dropna().tolist()==occupationInfo_header1:
  383. occIndex1=i;
  384. break;
  385. occDf = df[1:occIndex1].reset_index(drop=True)#工作单位
  386. occDfNew = pd.DataFrame()
  387. occDf1New = pd.DataFrame()
  388. #删除为none的列 合并的bug TODO
  389. for i in range(0,occDf.index.size):
  390. occDfNew = occDfNew.append([pd.DataFrame(occDf.iloc[i].dropna().reset_index(drop=True)).T],ignore_index=True)
  391. occDf1 = df[occIndex1+1:df.index.size].reset_index(drop=True) #职业
  392. for i in range(0,occDf1.index.size):
  393. occDf1New = occDf1New.append([pd.DataFrame(occDf1.iloc[i].dropna().reset_index(drop=True)).T], ignore_index=True)
  394. row = occDf.loc[0,:].dropna()
  395. occupationInfoDf.loc[occupationInfoIndex,'工作单位'] = row[1]
  396. occDf = pd.concat([occDfNew, occDf1New], axis=1, ignore_index=True)#合并df
  397. last3yearDate = utils.getLastMonthDate(queryInfo['reportTime'],12*3)
  398. occDf = utils.replaceDateColIdx(occDf,occDf.columns.size-1)
  399. dateIndex = occDf.columns.size-1;#日期列
  400. last3yearOccDf = occDf[occDf[dateIndex]>=last3yearDate]
  401. occupationInfoDf.loc[occupationInfoIndex, '最近3年内工作单位数'] = last3yearOccDf.index.size;
  402. occupationInfoDf.loc[occupationInfoIndex, '单位电话'] = row[4];
  403. reportTime = queryInfo['reportTime']
  404. try:
  405. occupationInfoDf.loc[occupationInfoIndex, '最早进入本单位年份距报告日期时长'] = utils.difDateReportTime(reportTime,np.min(occDf[dateIndex]));
  406. occupationInfoDf.loc[occupationInfoIndex, '最新进入本单位年份距报告日期时长'] = utils.difDateReportTime(reportTime, np.max(occDf[dateIndex]));
  407. except:
  408. logger.error("最早进入本单位年份距报告日期时长解析异常")
  409. row0 = occDf.loc[0,:].dropna().reset_index(drop=True)#最新
  410. occupationInfoDf.loc[occupationInfoIndex, '单位性质'] =row0[2]
  411. occupationInfoDf.loc[occupationInfoIndex, '单位地址'] = row0[3]
  412. occupationInfoDf.loc[occupationInfoIndex, '职业'] = row0[6]
  413. occupationInfoDf.loc[occupationInfoIndex, '行业'] = row0[7]
  414. occupationInfoDf.loc[occupationInfoIndex, '职务'] = row0[8]
  415. occupationInfoDf.loc[occupationInfoIndex, '职称'] = row0[9]
  416. occupationInfoDf.loc[occupationInfoIndex, '进入本单位年份'] = row0[10]
  417. occupationInfoDf.loc[occupationInfoIndex, '信息更新日期'] = row0[11]
  418. occupationInfoDf.loc[occupationInfoIndex, '历史工作单位数'] = occDf1.index.size
  419. # 日期相减离当前时间月份
  420. # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4
  421. # def difMonth(dateStr):
  422. # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) -
  423. # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30);
  424. # 信贷交易明细汇总
  425. def parseLoanTradeInfo(dfObj):
  426. df = dfObj["df"];
  427. # row1 = df.loc[1, :]
  428. loanMonthDf = df[1: 4]
  429. loanMonthDf = loanMonthDf.reset_index(drop=True)
  430. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '个人住房贷款账户数'] = loanMonthDf.loc[0, :][2]
  431. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex,'个人商用房贷款(包括商住两用)账户数']=loanMonthDf.loc[1, :][2]
  432. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '其他类贷款账户数'] = loanMonthDf.loc[2, :][2]
  433. creditCardDf = df[4: 6];
  434. creditCardDf = creditCardDf.reset_index(drop=True)
  435. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '贷记卡账户数'] = creditCardDf.loc[0, :][2]
  436. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '准贷记卡账户数'] = creditCardDf.loc[1, :][2]
  437. # 解析呆账信息汇总
  438. def parseBadDebtsInfoSumDf(dfObj):
  439. df = dfObj["df"];
  440. if not df.empty:
  441. row1 = df.loc[1, :]
  442. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '账户数'] = row1[0];
  443. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '余额'] = row1[1];
  444. # 解析被追偿信息汇总
  445. def parseRecoveryInfoSum(dfObj):
  446. df = dfObj["df"];
  447. if not df.empty:
  448. row1 = df.loc[1, :]
  449. row2 = df.loc[2, :]
  450. row3 = df.loc[3, :]
  451. overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数
  452. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务账户数'] = row1[1];
  453. overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额
  454. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务余额'] = replaceAmt(row1[2]);
  455. overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数
  456. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务账户数'] = row2[1];
  457. overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额
  458. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务余额'] = replaceAmt(row2[2]);
  459. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总账户数'] = row3[1];
  460. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总余额'] = replaceAmt(row3[2]);
  461. # 贷款逾期账户数
  462. # 贷款逾期月份数
  463. # 贷款单月最高逾期总额
  464. # 贷款最长逾期月数
  465. def parseOverdueInfoSum(dfObj):
  466. df = dfObj["df"];
  467. if not df.empty:
  468. row2= df.loc[2, :]
  469. row3 = df.loc[3, :]
  470. row4 = df.loc[4, :]
  471. row5 = df.loc[5, :]
  472. row6 = df.loc[6, :]
  473. #这块的数据需要进行出来 TODO
  474. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户账户数'] = utils.toInt(row2[1]);
  475. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户月份数'] = utils.toInt(row2[2]);
  476. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户单月最高逾期总额'] = utils.replaceAmt(row2[3]);
  477. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户最长逾期月数'] = utils.toInt(row2[4]);
  478. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户账户数'] = utils.toInt(row3[1]);
  479. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户月份数'] = utils.toInt(row3[2]);
  480. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户单月最高逾期总额'] = utils.replaceAmt(row3[3]);
  481. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户最长逾期月数'] = utils.toInt(row3[4]);
  482. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户账户数'] = utils.toInt(row4[1]);
  483. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户月份数'] = utils.toInt(row4[2]);
  484. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户单月最高逾期总额'] = utils.replaceAmt(row4[3]);
  485. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户最长逾期月数'] = utils.toInt(row4[4]);
  486. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户账户数'] = utils.toInt(row5[1]);
  487. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户月份数'] = utils.toInt(row5[2]);
  488. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户单月逾期总额'] = utils.replaceAmt(row5[3]);
  489. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户最长逾期月数'] = utils.toInt(row5[4]);
  490. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户账户数'] = utils.toInt(row6[1]);
  491. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户月份数'] = utils.toInt(row6[2]);
  492. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户单月透支总额'] = utils.replaceAmt(row6[3]);
  493. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户最长透支月数'] = utils.toInt(row6[4]);
  494. overdueInfoAccountDf = df[df[1] != '--'];
  495. overdueInfoAccountDf = overdueInfoAccountDf[2:7]
  496. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户最长逾期/透支月数最大值']=np.max(overdueInfoAccountDf[4].astype('int'))
  497. #np.sum(overdueInfoAccountDf[1])
  498. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户数加总']= np.max(overdueInfoAccountDf[1].astype('int'))# TODO
  499. # briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目']=None# TODO
  500. # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum
  501. # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum
  502. # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum
  503. # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits
  504. # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance
  505. # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount
  506. # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数
  507. # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%')
  508. # 2.贷款额度>100元
  509. # 3.贷款状态不为“结清”"
  510. # 非循环贷账户信息汇总
  511. def doFilterCalc(dfx):
  512. dfx = dfx.replace('--', 0)
  513. return dfx;
  514. # 科学计数法转换
  515. def replaceAmt(dfx):
  516. return dfx.str.replace(',', '')
  517. # 非循环贷账户信息汇总
  518. def parseLoanAccountInfoSum(dfObj):
  519. df = dfObj["df"];
  520. if not df.empty:
  521. loanAccountInfoSumDf = df[2:3];
  522. loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0
  523. loanAccountInfoSumDf = loanAccountInfoSumDf.reset_index(drop=True)
  524. row0 = loanAccountInfoSumDf.loc[0,:]
  525. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户管理机构数'] = int(row0[0])
  526. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'] = int(row0[1])
  527. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  528. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  529. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  530. # 循环额度下分账户
  531. def parseCycleCreditAccountInfoSum(dfObj):
  532. df = dfObj["df"];
  533. if not df.empty:
  534. cycleCreditAccountInfoSumDf = df[2:3];
  535. cycleCreditAccountInfoSumDf = doFilterCalc(cycleCreditAccountInfoSumDf); # 替换--为0
  536. cycleCreditAccountInfoSumDf = cycleCreditAccountInfoSumDf.reset_index(drop=True)
  537. row0 = cycleCreditAccountInfoSumDf.loc[0,:]
  538. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户管理机构数'] = int(row0[0])
  539. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'] = int(row0[1])
  540. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额'] = int(utils.replaceAmt(row0[2]))
  541. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额'] = int(utils.replaceAmt(row0[3]))
  542. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  543. # 循环贷账户信息
  544. def parseCyleLoanAccountInfoSum(dfObj):
  545. df = dfObj["df"];
  546. if not df.empty:
  547. cycleLoanAccountInfoSumDf = df[2:3];
  548. cycleLoanAccountInfoSumDf = doFilterCalc(cycleLoanAccountInfoSumDf); # 替换--为0
  549. cycleLoanAccountInfoSumDf = cycleLoanAccountInfoSumDf.reset_index(drop=True)
  550. row0 = cycleLoanAccountInfoSumDf.loc[0,:]
  551. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户管理机构数'] = int(row0[0])
  552. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户账户数'] = int(row0[1])
  553. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  554. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  555. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  556. # 解析贷记卡信息汇总,包含准贷记卡
  557. def parseCreditCardInfoSum(dfObj):
  558. df = dfObj["df"];
  559. if not df.empty:
  560. creditCardInfoSumDf = df[2:3];
  561. creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0
  562. creditCardInfoSumDf = creditCardInfoSumDf.reset_index(drop=True)
  563. row0 = creditCardInfoSumDf.loc[0, :]
  564. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡发卡机构数'] = int(row0[0])
  565. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡账户数'] = int(row0[1])
  566. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  567. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  568. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  569. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  570. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  571. # 解析贷记卡信息汇总,包含准贷记卡
  572. def parseCreditCardInfoSumZ(dfObj):
  573. df = dfObj["df"];
  574. if not df.empty:
  575. creditCardInfoSumDfZ = df[2:3];
  576. creditCardInfoSumDfZ = doFilterCalc(creditCardInfoSumDfZ);
  577. creditCardInfoSumDfZ = creditCardInfoSumDfZ.reset_index(drop=True)
  578. row0 = creditCardInfoSumDfZ.loc[0, :]
  579. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡发卡机构数'] = int(row0[0])
  580. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡账户数'] = int(row0[1])
  581. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  582. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  583. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  584. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  585. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  586. #解析公共信息汇总
  587. def parsePublicInfoBrief(dfObj):
  588. df = dfObj["df"];
  589. if not df.empty:
  590. publicInfoBrief = df[1:6];
  591. publicInfoBrief = publicInfoBrief.reset_index(drop=True)
  592. row0 = publicInfoBrief.loc[0, :]
  593. row1 = publicInfoBrief.loc[1, :]
  594. row2 = publicInfoBrief.loc[2, :]
  595. row3 = publicInfoBrief.loc[3, :]
  596. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-记录数'] = int(row0[1])
  597. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-涉及金额'] = int(utils.replaceAmt(row0[2]))
  598. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-记录数'] = int(row1[1])
  599. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-涉及金额'] = int(utils.replaceAmt(row1[2]))
  600. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-记录数'] = int(row2[1])
  601. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-涉及金额'] = int(utils.replaceAmt(row2[2]))
  602. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-记录数'] = int(row3[1])
  603. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-涉及金额'] = int(utils.replaceAmt(row3[2]))
  604. #解析查询信息汇总
  605. def parseQueryRecordSum(dfObj):
  606. df = dfObj["df"];
  607. if not df.empty:
  608. queryRecordSumDfTmp = df[2:3];
  609. queryRecordSumDfTmp = queryRecordSumDfTmp.reset_index(drop=True)
  610. row0 = queryRecordSumDfTmp.loc[0, :]
  611. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-贷款审批'] =int(row0[0])
  612. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-信用卡审批'] =int(row0[1])
  613. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-贷款审批'] =int(row0[2])
  614. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-信用卡审批'] =int(row0[3])
  615. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-本人查询'] =int(row0[4])
  616. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-贷后管理'] =int(row0[5])
  617. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-担保资格审查'] =int(row0[6])
  618. # 解析查询记录明细
  619. def parseQueryInfoDetail(dfObj):
  620. df = dfObj["df"];
  621. reportTime = queryInfo["reportTime"];
  622. if not df.empty:
  623. df = utils.replaceDateCol(df)
  624. df = df[1:df.index.size] # 去掉表头
  625. # queryRecordDetailDf.loc[0,'近1月查询次数'] = qip.getLastMonthQueryTimes(df, 1, "",reportTime)
  626. # queryRecordDetailDf.loc[0, '近3月查询次数'] = qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  627. # queryRecordDetailDf.loc[0, '近6月查询次数'] = qip.getLastMonthQueryTimes(df, 6, "", reportTime)
  628. # queryRecordDetailDf.loc[0, '近12月查询次数'] = qip.getLastMonthQueryTimes(df, 12, "", reportTime)
  629. #
  630. # queryRecordDetailDf.loc[0, '近3月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
  631. # queryRecordDetailDf.loc[0, '近3月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
  632. # queryRecordDetailDf.loc[0, '近6月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
  633. # queryRecordDetailDf.loc[0, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
  634. # queryRecordDetailDf.loc[0, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
  635. # queryRecordDetailDf.loc[0, '近12月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
  636. # queryRecordDetailDf.loc[0, '近3月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
  637. # queryRecordDetailDf.loc[0, '近3月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
  638. # queryRecordDetailDf.loc[0, '近6月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
  639. # queryRecordDetailDf.loc[0, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
  640. # queryRecordDetailDf.loc[0, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
  641. # queryRecordDetailDf.loc[0, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
  642. # queryRecordDetailDf.loc[0, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
  643. #
  644. # queryRecordDetailDf.loc[0, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
  645. # queryRecordDetailDf.loc[0, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
  646. # queryRecordDetailDf.loc[0, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
  647. # queryRecordDetailDf.loc[0, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
  648. # queryRecordDetailDf.loc[0, '最近12个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
  649. # queryRecordDetailDf.loc[0, '最近12个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove,reportTime)
  650. queryRecordDetailDf.loc[queryRecordDetailIndex, '近1月查询次数'] =qip.getLastMonthQueryTimes(df, 1, "",reportTime)
  651. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数'] =qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  652. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数'] =qip.getLastMonthQueryTimes(df, 6, "",reportTime)
  653. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数'] =qip.getLastMonthQueryTimes(df, 12, "",reportTime)
  654. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近1个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 1, "", reportTime)
  655. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近3个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 3, "", reportTime)
  656. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 6, "", reportTime)
  657. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 12, "", reportTime)
  658. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 24, "", reportTime)
  659. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
  660. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
  661. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
  662. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
  663. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
  664. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
  665. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
  666. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数信用卡审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
  667. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
  668. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
  669. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
  670. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
  671. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.insuranceAprove,reportTime)
  672. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.insuranceAprove,reportTime)
  673. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanAfterMgr,reportTime)
  674. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanAfterMgr,reportTime)
  675. queryRecordDetailDf.loc[queryRecordDetailIndex, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
  676. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
  677. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
  678. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
  679. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
  680. #解析贷款还款记录指标
  681. def parseLoanMergeAndPayRecordDf(df,payRcdDf):
  682. if not df.empty and not payRcdDf.empty:
  683. #正常
  684. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  685. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  686. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  687. #临时保存,不用过滤还款状态为0的
  688. payRcdMaxOverdueDf = overduePayRcdDf;
  689. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0]
  690. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  691. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数']/df.index.size,2)
  692. #存在逾期的贷款账户 非结清的过滤出逾期的账户号
  693. overdueLoanDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  694. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size
  695. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] / df['管理机构'].unique().size,2)
  696. #还款记录按日期排序最近3笔的最大逾期期数
  697. loanAccountInfoDf.loc[loanAccountInfoIndex, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1);
  698. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  699. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  700. loanAccountInfoDf.loc[loanAccountInfoIndex, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  701. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  702. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
  703. payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"]
  704. # 贷款24期还款记录次数 剔除结清 转出 呆账
  705. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  706. #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
  707. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  708. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  709. #payRcdDf[(payRcdDf['还款状态']!='') & (payRcdDf['账户编号']==1)].index.size
  710. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款24期还款记录次数'] = np.max(payRcdTimes)
  711. #信贷交易明细-非循环贷账户
  712. def parseLoanAccountInfo(df):
  713. if not df.empty:
  714. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  715. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  716. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  717. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  718. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  719. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  720. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  721. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  722. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  723. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  724. #信贷交易明细-循环额度分账户
  725. def parseCycleCreditAccountInfo(df):
  726. if not df.empty:
  727. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  728. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  729. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  730. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  731. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  732. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  733. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  734. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  735. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  736. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  737. #信贷交易明细-循环贷账户
  738. def parseCycleLoanAccountInfo(df):
  739. if not df.empty:
  740. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  741. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  742. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  743. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  744. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  745. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  746. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  747. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  748. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  749. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  750. #解析贷款账户信息指标
  751. def parseLoanMergeDf(df):
  752. if not df.empty:
  753. sortDf = df.sort_values(by=["开立日期","借款金额(本金)"] , ascending=(False,False))
  754. sortDf = sortDf[sortDf['账户状态'] == '结清'];
  755. sortDf = sortDf.reset_index(drop=True)
  756. if not sortDf.empty:
  757. row0 = sortDf.loc[0, :]
  758. loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)']
  759. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)']
  760. openDate = dfParser.formatDate(row0['开立日期'])
  761. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的发放距今月数'] = utils.difMonth(openDate)
  762. settleDate = dfParser.formatDate(row0['账户关闭日期'])
  763. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的结清距今月数'] = utils.difMonth(settleDate)
  764. loanAccountInfoDf.loc[loanAccountInfoIndex, '历史贷款总法人机构数'] = df['管理机构'].unique().size
  765. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前同时在用的贷款机构数'] = df[df['借款金额(本金)']>0]['管理机构'].unique().size
  766. statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  767. bankDf = statusDf[statusDf['管理机构'].str.contains('银行')]
  768. #没有记录
  769. if statusDf.index.size==0:
  770. isNotBankCust = -1
  771. else:
  772. if bankDf.index.size >0:#有一条以上不为结清,请包含银行
  773. isNotBankCust = 1;
  774. else:
  775. isNotBankCust = 0;
  776. loanAccountInfoDf.loc[loanAccountInfoIndex, '是否有非银行贷款客户'] = isNotBankCust
  777. #最严重的五级分类
  778. # fiveType = ""
  779. # for fiveTypeTmp in consts.fiveType:
  780. # fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp];
  781. # if not fiveTypeDf.empty:
  782. # fiveType = fiveTypeTmp;
  783. # break;
  784. # loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款五级分类'] = fiveType
  785. #当前贷款LTV
  786. # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0
  787. # 的记录后,SUM(本金余额) / SUM(贷款本金)
  788. # 如本金余额为空和贷款本金为0或为空,则当条记录不计算
  789. loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')]
  790. badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))]
  791. balanceSum = np.sum(badSetDf['余额(本金)'].astype('int'))
  792. loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int'))
  793. if(loanAmtSum !=0):
  794. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2)
  795. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2)
  796. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  797. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  798. #['个人住房商业贷款','个人商用房(含商住两用)贷款','个人住房公积金贷款','房'],
  799. houseLtvList = consts.houseLtvList;
  800. # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)]
  801. # if not houseLtvDf.empty:
  802. # loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2)
  803. #['个人住房贷款','个人商用房(包括商住两用)贷款']
  804. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = lip.getCurLtv(badSetDf, houseLtvList)
  805. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款机构数量'] = loanLtvDf['管理机构'].unique().size
  806. cardLtvList = ['个人汽车消费贷款','车']
  807. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList)
  808. operateLtvList = ['个人经营性贷款']
  809. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList)
  810. consumeLtvList = ['其他个人消费贷款']
  811. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList)
  812. bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行','财务公司']
  813. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  814. bankLtvList = ['消费金融机构']# TODO
  815. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  816. smallLoanLtvList = ['小额信贷公司']
  817. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList)
  818. #当前贷款最大逾期期数
  819. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数)
  820. loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  821. if not loanOverdueLtvDf.empty:
  822. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数'])
  823. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额'])
  824. loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True)
  825. maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数'])
  826. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额']
  827. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里
  828. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3)
  829. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3)
  830. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6)
  831. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6)
  832. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6)
  833. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12)
  834. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12)
  835. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12)
  836. lastLoanDf = loanOverdueLtvDf;
  837. if not lastLoanDf.empty:
  838. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"])
  839. normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)]
  840. #未结清贷款总账户数:账户状态不等于结清和转出的记录数
  841. notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  842. if not notSettleDf.empty:
  843. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数'] = normalDf.index.size
  844. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2)
  845. #当前未结清贷款余额总和
  846. # ltvDf = tmpDf[tmpDf['业务种类'].isin(bizTypeList)]
  847. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  848. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  849. # 当前未结清住房贷款余额总和
  850. houseDf = notSettleDf[notSettleDf['业务种类'].isin(houseLtvList)]
  851. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清住房贷款余额总和'] = np.sum(houseDf['余额(本金)'])
  852. # 当前未结清汽车贷款余额总和
  853. cardDf = notSettleDf[notSettleDf['业务种类'].isin(cardLtvList)]
  854. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清汽车贷款余额总和'] = np.sum(cardDf['余额(本金)'])
  855. # 当前未结清个人经营性贷款余额总和
  856. operateLtvDf = notSettleDf[notSettleDf['业务种类'].isin(operateLtvList)]
  857. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清个人经营性贷款余额总和'] = np.sum(operateLtvDf['余额(本金)'])
  858. # 当前平均每月贷款余额总和
  859. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前平均每月贷款余额总和'] = round(np.sum(normalDf['余额(本金)'])/12,2)
  860. #当前正常贷款账户余额
  861. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)'])
  862. # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总)
  863. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']))
  864. settleDf = df[(df['账户状态'] == '结清')]
  865. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数'] = settleDf.index.size
  866. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2)
  867. #贷款24期还款记录次数 TODO
  868. # 最近3个月个人消费贷款发放额度
  869. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款'])
  870. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3,queryInfo["reportTime"])
  871. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6,queryInfo["reportTime"])
  872. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12,queryInfo["reportTime"])
  873. #未结清贷款平均剩余还款期数
  874. payPieDf = settleDf[settleDf['还款期数']!='--']
  875. if payPieDf.index.size!=0:
  876. loanAccountInfoDf.loc[loanAccountInfoIndex, '未结清贷款平均剩余还款期数'] = int(np.sum(payPieDf['剩余还款期数'])/payPieDf.index.size)
  877. # 当前贷款本月应还金额总和
  878. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月应还金额总和'] = np.sum(notSettleDf['本月应还款'])
  879. # 当前贷款本月实还金额总额
  880. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月实还金额总额'] = np.sum(notSettleDf['本月实还款'])
  881. #解析贷记卡账户信息指标
  882. def parseCreditCardMergeDf(df):
  883. if not df.empty:
  884. # 历史信用卡总法人机构数
  885. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size
  886. # creditCardUseDf = df[df['已用额度']>0];
  887. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size
  888. #统一排除
  889. creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  890. #大额专项分期额度(合计)
  891. # 已用分期金额(合计)
  892. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '大额专项分期额度(合计)'] = np.sum(creditDf['大额专项分期额度'])
  893. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '已用分期金额(合计)'] = np.sum(creditDf['已用分期金额'])
  894. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf)
  895. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf)
  896. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9)
  897. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, df, 1)
  898. # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度)
  899. useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  900. totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
  901. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2)
  902. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2)
  903. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2)
  904. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数'] = np.max(creditDf['当前逾期期数'])#用于计算
  905. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期金额'] = np.max(creditDf['当前逾期总额'])
  906. if not creditDf.empty:
  907. creditDf = creditDf.reset_index(drop=True)
  908. maxOverdueIndex = np.argmax(creditDf['当前逾期期数'])
  909. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额']
  910. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3)
  911. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3)
  912. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3)
  913. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6)
  914. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6)
  915. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6)
  916. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12)
  917. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12)
  918. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12)
  919. if not creditDf.empty:
  920. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"])
  921. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡还款比例'] = round(np.sum(creditDf['本月应还款'])/np.sum(creditDf['本月实还款']),2)
  922. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最高还款比例'] = round(np.max(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
  923. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最低还款比例'] = round(np.min(creditDf['本月应还款']) / np.sum(creditDf['本月实还款']), 2)
  924. normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
  925. notCloseDf = df[(df['账户状态'] != '销户')]
  926. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数'] = normalDf.index.size
  927. if not notCloseDf.empty and not normalDf.empty:
  928. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数占比'] = round(normalDf.index.size/notCloseDf.index.size,2)
  929. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡已用额度'] = np.sum(normalDf['已用额度'])
  930. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数'] = normalDf[normalDf['已用额度']>0].index.size
  931. if not creditDf.empty:
  932. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2)
  933. if notCloseDf.empty:
  934. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = -99
  935. else:
  936. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = \
  937. round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数']/notCloseDf.index.size,3)
  938. #当前正常贷记卡账户余额占总余额比
  939. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月实还金额总和'] = np.sum(creditDf['本月实还款'])
  940. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月应还金额总和'] = np.sum(creditDf['本月应还款'])
  941. maxAmtDf = df[(df['币种'] == '人民币元')]
  942. if not maxAmtDf.empty:
  943. maxAmtDf = maxAmtDf.reset_index(drop=True)
  944. maxAmtIndex = np.argmax(maxAmtDf['账户授信额度'])
  945. maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期'];
  946. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"])+1;
  947. # 名下贷记卡数量-状态正常
  948. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] = df[(df['账户状态'] != '销户')].index.size
  949. # 名下贷记卡数量-状态未激活
  950. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] = df[(df['账户状态'] == '未激活')].index.size
  951. # 名下贷记卡数量-状态异常--异常包含(2-冻结,3-止付,5-呆帐,10-其他)
  952. abnormalList = ['冻结','止付','呆帐','其他']
  953. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] = df[(df['账户状态'].isin(abnormalList))].index.size
  954. # 名下贷记卡比例-状态正常
  955. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态正常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] / df.index.size,2)
  956. # 名下贷记卡比例-状态未激活
  957. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态未激活'] =round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] / df.index.size,2)
  958. # 名下贷记卡比例-状态异常
  959. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态异常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] / df.index.size,2)
  960. #解析准贷记卡账户信息指标
  961. def parseCreditCardMergeDfZ(df):
  962. if not df.empty:
  963. #统一排除
  964. creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  965. useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  966. totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
  967. normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
  968. notCloseDf = df[(df['账户状态'] != '销户')]
  969. #解析使用率 TODO 使用汇总计算还是使用明细计算
  970. def parseUseRate():
  971. # useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)']
  972. # 从“信贷交易授信及负债信息概要”中“非循环贷账户信息汇总”、“循环额度下分账户信息汇总”、“循环贷账户信息汇总”、“贷记卡账户信息汇总”和“准贷记卡账户信息汇总”里提取,SUM(
  973. # 所有“余额”、“已用额度”和“透支余额”) / SUM(所有“授信总额”和“授信额度”)
  974. loanUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额']
  975. cycleCreditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额']
  976. cycleUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额']
  977. creditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度']
  978. creditAmtUseZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度']
  979. loanTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额']
  980. cycleCreditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额']
  981. cycleTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额']
  982. creditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额']
  983. creditAmtTotalZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额']
  984. # if str(loanUseAmt)=="nan":
  985. # loanUseAmt = 0;
  986. # if str(cycleCreditUseAmt) == "nan":
  987. # loanUseAmt = 0;
  988. # if str(cycleCreditUseAmt) == "nan":
  989. # loanUseAmt = 0;
  990. useAmt = loanUseAmt+cycleCreditUseAmt+cycleUseAmt+creditUseAmt+creditAmtUseZ
  991. totalAmt = loanTotalAmt+cycleCreditTotalAmt+cycleTotalAmt+creditTotalAmt+creditAmtTotalZ
  992. if totalAmt !=0:
  993. useRateDf.loc[useRateIndex, '全账户使用率(已用额度/授信总额)'] = round(useAmt / totalAmt,2)
  994. if loanTotalAmt!=0:
  995. useRateDf.loc[useRateIndex, '非循环贷账户使用率(已用额度/授信总额)'] = round(loanUseAmt / loanTotalAmt,2)
  996. if cycleCreditTotalAmt !=0:
  997. useRateDf.loc[useRateIndex, '循环额度下分账户使用率(已用额度/授信总额)'] = round(cycleCreditTotalAmt / cycleCreditTotalAmt,2)
  998. if cycleTotalAmt !=0:
  999. useRateDf.loc[useRateIndex, '循环贷账户使用率(已用额度/授信总额)'] = round(cycleUseAmt / cycleTotalAmt,2)
  1000. if creditTotalAmt !=0:
  1001. useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)'] = round(creditUseAmt / creditTotalAmt,2)
  1002. if creditAmtTotalZ !=0:
  1003. useRateDf.loc[useRateIndex, '准贷记卡账户使用率(已用额度/授信总额)'] = round(creditAmtUseZ / creditAmtTotalZ,2)
  1004. #解析开户数
  1005. def parseOpenAccount(loanDf,creditCardDf,creditCardDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1006. reportTime = queryInfo["reportTime"];
  1007. openAccountDf.loc[openAccountIndex, '近3个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,3)+cip.getOpenAccount(creditCardDf,reportTime,3)+cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1008. openAccountDf.loc[openAccountIndex, '近6个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,6)+cip.getOpenAccount(creditCardDf,reportTime,6)+cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1009. openAccountDf.loc[openAccountIndex, '近9个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,9)+cip.getOpenAccount(creditCardDf,reportTime,9)+cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1010. openAccountDf.loc[openAccountIndex, '近12个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,12)+cip.getOpenAccount(creditCardDf,reportTime,12)+cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1011. openAccountDf.loc[openAccountIndex, '近24个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,24)+cip.getOpenAccount(creditCardDf,reportTime,24)+cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1012. openAccountDf.loc[openAccountIndex, '近3个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,consts.bankList)
  1013. openAccountDf.loc[openAccountIndex, '近6个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,consts.bankList)
  1014. openAccountDf.loc[openAccountIndex, '近9个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,consts.bankList)
  1015. openAccountDf.loc[openAccountIndex, '近12个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,consts.bankList)
  1016. openAccountDf.loc[openAccountIndex, '近24个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,consts.bankList)
  1017. openAccountDf.loc[openAccountIndex, '近3个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,"")
  1018. openAccountDf.loc[openAccountIndex, '近6个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,"")
  1019. openAccountDf.loc[openAccountIndex, '近9个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,"")
  1020. openAccountDf.loc[openAccountIndex, '近12个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,"")
  1021. openAccountDf.loc[openAccountIndex, '近24个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,"")
  1022. openAccountDf.loc[openAccountIndex, '近3个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,3)
  1023. openAccountDf.loc[openAccountIndex, '近6个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,6)
  1024. openAccountDf.loc[openAccountIndex, '近9个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,9)
  1025. openAccountDf.loc[openAccountIndex, '近12个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,12)
  1026. openAccountDf.loc[openAccountIndex, '近24个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,24)
  1027. openAccountDf.loc[openAccountIndex, '近3个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1028. openAccountDf.loc[openAccountIndex, '近6个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1029. openAccountDf.loc[openAccountIndex, '近9个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1030. openAccountDf.loc[openAccountIndex, '近12个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1031. openAccountDf.loc[openAccountIndex, '近24个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1032. #从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”、“贷记卡账户”和“准贷记卡账户”里提取,5年里账户还款状态出现“1、2、3、4、5、6、7、D、Z、G、B”的账户数/所有账户数
  1033. overdueLoanPayRcdDf = loanPayRecordMergeDf[loanPayRecordMergeDf['账户编号'].isin(loanDf['账户编号'].values)]
  1034. overdueLoanPayRcdDf = utils.replacePayRcdStatusOverdue(overdueLoanPayRcdDf)
  1035. overdueLoanPayRcdDf = overdueLoanPayRcdDf[overdueLoanPayRcdDf['还款状态'] > 0]
  1036. overdueCreditPayRcdDf = creditCardPayRecordMergeDf[creditCardPayRecordMergeDf['账户编号'].isin(creditCardDf['账户编号'].values)]
  1037. overdueCreditPayRcdDf = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDf)
  1038. overdueCreditPayRcdDf = overdueCreditPayRcdDf[overdueCreditPayRcdDf['还款状态'] > 0]
  1039. overdueCreditPayRcdDfZ = creditCardPayRecordMergeDfZ[creditCardPayRecordMergeDfZ['账户编号'].isin(creditCardDfZ['账户编号'].values)]
  1040. overdueCreditPayRcdDfZ = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDfZ)
  1041. overdueCreditPayRcdDfZ = overdueCreditPayRcdDfZ[overdueCreditPayRcdDfZ['还款状态'] > 0]
  1042. loanAccountNum = loanPayRecordMergeDf['账户编号'].unique().size
  1043. creditAccountNum = creditCardPayRecordMergeDf['账户编号'].unique().size
  1044. creditAccountNumZ = creditCardPayRecordMergeDfZ['账户编号'].unique().size
  1045. overdueLoanNum = overdueLoanPayRcdDf['账户编号'].unique().size
  1046. overdueCreditNum = overdueCreditPayRcdDf['账户编号'].unique().size
  1047. overdueCreditNumZ = overdueCreditPayRcdDfZ['账户编号'].unique().size
  1048. openAccountDf.loc[openAccountIndex, '有过逾期记录的账户/全账户数'] = round((overdueLoanNum+overdueCreditNum+overdueCreditNumZ)/(loanAccountNum+creditAccountNum+creditAccountNumZ),2)
  1049. otherPerLoanDf = loanDf[loanDf['业务种类'].isin(consts.otherPerLoan)]
  1050. otherPerLoanNum = otherPerLoanDf.index.size;
  1051. overdueOtherPerLoanNum = otherPerLoanDf[otherPerLoanDf['账户编号'].isin(overdueLoanPayRcdDf['账户编号'].values)].index.size;
  1052. if otherPerLoanNum!=0:
  1053. openAccountDf.loc[openAccountIndex, '有过逾期记录的消费金融类账户/全消费金融类账户数'] = overdueOtherPerLoanNum/otherPerLoanNum
  1054. if loanAccountNum!=0:
  1055. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷款账户/全贷款账户数'] = round(overdueLoanNum/loanAccountNum,2)
  1056. if creditAccountNum!=0:
  1057. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷记卡账户/全贷记卡账户数'] = round(overdueCreditNum/creditAccountNum,2)
  1058. if creditAccountNumZ!=0:
  1059. openAccountDf.loc[openAccountIndex, '有过透支记录的准贷记卡账户/全准贷记卡账户数']= round(overdueCreditNumZ/creditAccountNumZ,2)
  1060. #解析24期还款状态指标
  1061. def parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1062. reportTime = queryInfo["reportTime"];
  1063. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)
  1064. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)
  1065. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)
  1066. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)
  1067. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)
  1068. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)
  1069. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)
  1070. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)
  1071. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)
  1072. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)
  1073. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或大等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)
  1074. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)
  1075. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近3月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)
  1076. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)
  1077. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)
  1078. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)
  1079. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)
  1080. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)
  1081. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)
  1082. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)
  1083. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)
  1084. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)
  1085. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)
  1086. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)
  1087. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1088. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1089. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1090. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,6)
  1091. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1092. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1093. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)\
  1094. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,3)
  1095. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“1”的次数'] = \
  1096. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)\
  1097. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,6)
  1098. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“1”的次数'] = \
  1099. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)\
  1100. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,12)
  1101. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“1”的次数'] = \
  1102. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)\
  1103. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,24)
  1104. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“2”的次数'] = \
  1105. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)\
  1106. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,6)
  1107. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“2”的次数'] = \
  1108. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)\
  1109. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,12)
  1110. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“2”的次数'] = \
  1111. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)\
  1112. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,24)
  1113. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“3”的次数'] = \
  1114. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)\
  1115. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1116. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“3”的次数'] = \
  1117. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)\
  1118. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1119. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“3”的次数'] = \
  1120. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)\
  1121. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1122. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“4”的次数'] = \
  1123. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)\
  1124. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1125. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“4”的次数'] = \
  1126. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)\
  1127. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1128. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"G"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"G",24)
  1129. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDf,reportTime,"G",24)
  1130. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDfZ,reportTime,"G",24)
  1131. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"Z"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"Z",24)
  1132. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24)
  1133. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月状态正常账户数目'] = prp.getLoanNormalCount(loanPayRecordMergeDf,reportTime,24)
  1134. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24)
  1135. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDf,reportTime,24)
  1136. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1137. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1138. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去3个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,3)
  1139. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去6个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,6)
  1140. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去12个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,12)
  1141. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去24个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,24)
  1142. #概要信息里的字段,从还款状态计算
  1143. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目'] = \
  1144. prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24*5)+prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24*5)\
  1145. +prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24*5)
  1146. #解析贷款还款记录指标
  1147. def parseCreditCardMergeAndPayRecordDf(df,payRcdDf):
  1148. if not df.empty and not payRcdDf.empty:
  1149. # 正常
  1150. normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1151. if not normalDf.empty:
  1152. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1153. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  1154. # 临时保存,不用过滤还款状态为0的
  1155. payRcdMaxOverdueDf = overduePayRcdDf;
  1156. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0]
  1157. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  1158. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数)
  1159. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数占比'] = round(overduePayRcdDf['账户编号'].unique().size / normalDf.index.size, 2)
  1160. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数
  1161. overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  1162. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size
  1163. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2)
  1164. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  1165. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  1166. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近9月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  1167. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
  1168. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  1169. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf, 24);
  1170. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近3个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,3);
  1171. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近6个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,6);
  1172. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近9个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,9);
  1173. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近12个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,12);
  1174. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近24个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,24);
  1175. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1176. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  1177. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡24期还款记录次数'] = np.max(payRcdTimes)
  1178. # 解析被追偿信息汇总
  1179. def parseRecoveryInfoMergeDf(df):
  1180. if not df.empty:
  1181. i=0;
  1182. def main(pdf_path):
  1183. # 解析pdf开始
  1184. with pdfplumber.open(pdf_path) as pdf:
  1185. for p in range(0, len(pdf.pages)):
  1186. page = pdf.pages[p]
  1187. # first_page = pdf.pages[1]
  1188. # if p == 3:
  1189. # print(3)
  1190. for i in range(0, len(page.extract_tables())):
  1191. table = page.extract_tables()[i]
  1192. df = pd.DataFrame(table);
  1193. if len(keyList) > 1 and i == 0: # 判断是否被分页了
  1194. if not utils.checkHeader(df, allHeaders):
  1195. key = keyList[-1];
  1196. dfObj = dfMap[key]
  1197. # dfObj["nextDf"]=df;
  1198. # 贷款信息 贷记卡信息 强制执行记录
  1199. if key == "loanDfs" or key == "creditCardDfs" or key == "forceExecRcdDfs" or key == 'recoveryInfoDfs': # 属于列表
  1200. lastDfObj = dfObj["dfs"][-1];
  1201. lastDfObj["isByPage"] = str(p + 1);
  1202. if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同
  1203. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0,ignore_index=True); # 去最后一个进行合并
  1204. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1205. else:
  1206. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1207. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True);
  1208. else: # 查询记录明细 为单个列表
  1209. dfObj["isByPage"] = str(p + 1);
  1210. if len(dfObj["df"].columns) == len(df.columns):
  1211. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1212. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1213. else:
  1214. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1215. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1216. # dfObj["nextDf"] = df;
  1217. # 如果列数相等合并df
  1218. continue;
  1219. headerList0 = df.loc[0, :].tolist() # 第0行为表头
  1220. headerList0 = list(filter(None, headerList0))
  1221. headerList1 = []
  1222. if df.index.size>1:
  1223. headerList1 = df.loc[1, :].tolist() # 第1行为表头
  1224. headerList1 = list(filter(None, headerList1))
  1225. if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据
  1226. queryInfoDf = df;
  1227. dfKey = "queryInfoDf"
  1228. dfMap[dfKey]["df"] = df;
  1229. keyList.append(dfKey);
  1230. elif headerList0 == identity_header: # 身份信息
  1231. identityDf = df[:2] # 截取前2行
  1232. addressDf = df.iloc[2:4, [0, 5]] # 截取3到4行的第一和6
  1233. addressDf = addressDf.reset_index(drop=True)
  1234. mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5)
  1235. identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并
  1236. dfKey = "identityDf"
  1237. dfMap[dfKey]["df"] = identityDf;
  1238. keyList.append(dfKey);
  1239. # 组装电话号码df
  1240. dfMap[dfKey]["mobileDf"] = mobileDf
  1241. elif headerList0 == mateDf_header: # 配偶信息
  1242. mateDf = df;
  1243. dfKey = "mateDf"
  1244. dfMap[dfKey]["df"] = df;
  1245. keyList.append(dfKey);
  1246. elif headerList0 == liveInfoDf_header: # 居住信息
  1247. mateDf = df;
  1248. dfKey = "liveInfoDf"
  1249. dfMap[dfKey]["df"] = df;
  1250. keyList.append(dfKey);
  1251. elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页
  1252. occupationDf = df;
  1253. dfKey = "occupationDf"
  1254. dfMap[dfKey]["df"] = df;
  1255. keyList.append(dfKey);
  1256. # elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据
  1257. # queryInfoBriefDf = df;
  1258. # dfKey = "queryInfoBriefDf"
  1259. # dfMap[dfKey]["df"] = df;
  1260. # keyList.append(dfKey);
  1261. elif headerList0 == loanTradeInfo_header: # 信贷交易信息
  1262. loanTradeInfoDf = df;
  1263. dfKey = "loanTradeInfoDf";
  1264. dfMap[dfKey]["df"] = df;
  1265. keyList.append(dfKey);
  1266. elif headerList0 == recoveryInfoSumDf_header: # 被追偿信息汇总
  1267. recoveryInfoSumDf = df;
  1268. dfKey = "recoveryInfoSumDf";
  1269. dfMap[dfKey]["df"] = df;
  1270. keyList.append(dfKey);
  1271. elif headerList0 == badDebtsInfoSumDf_header: # 呆账信息
  1272. badDebtsInfoSumDf = df;
  1273. dfKey = "badDebtsInfoSumDf";
  1274. dfMap[dfKey]["df"] = df;
  1275. keyList.append(dfKey);
  1276. elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总
  1277. overdueInfoSumDf = df;
  1278. dfKey = "overdueInfoSumDf";
  1279. dfMap[dfKey]["df"] = df;
  1280. keyList.append(dfKey);
  1281. elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总
  1282. loanAccountInfoSumDf = df;
  1283. dfKey = "loanAccountInfoSumDf";
  1284. dfMap[dfKey]["df"] = df;
  1285. keyList.append(dfKey);
  1286. elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总
  1287. creditCardInfoSumDf = df;
  1288. dfKey = "creditCardInfoSumDf";
  1289. dfMap[dfKey]["df"] = df;
  1290. keyList.append(dfKey);
  1291. elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据
  1292. dfKey = "creditCardInfoSumDfZ";
  1293. dfMap[dfKey]["df"] = df;
  1294. keyList.append(dfKey);
  1295. elif headerList0 == publicInfoBriefDf_header0: #公共信息概要
  1296. dfKey = "publicInfoBriefDf";
  1297. dfMap[dfKey]["df"] = df;
  1298. keyList.append(dfKey);
  1299. elif headerList0 == queryRecordSumDf_header0:#查询记录汇总
  1300. dfKey = "queryRecordSumDf";
  1301. dfMap[dfKey]["df"] = df;
  1302. keyList.append(dfKey);
  1303. elif headerList0 == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户
  1304. dfKey = "loanDfs";
  1305. dfMap[dfKey]["dfs"].append({"df": df});
  1306. keyList.append(dfKey);
  1307. elif headerList0 == creditCard_header: # 贷记卡账户
  1308. dfKey = "creditCardDfs";
  1309. dfMap[dfKey]["dfs"].append({"df": df});
  1310. keyList.append(dfKey);
  1311. elif headerList0 == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并
  1312. dfKey = "creditCardDfsZ";
  1313. dfMap[dfKey]["dfs"].append({"df": df});
  1314. keyList.append(dfKey);
  1315. elif headerList0 == queryRecordDetailDf_header: # 查询记录明细
  1316. dfKey = "queryRecordDetailDf";
  1317. dfMap[dfKey]["df"] = df;
  1318. keyList.append(dfKey);
  1319. elif headerList0 == forceExecRcdDfs_header: # 强制执行记录
  1320. dfKey = "forceExecRcdDfs";
  1321. dfMap[dfKey]["dfs"].append({"df": df});
  1322. keyList.append(dfKey);
  1323. elif headerList0 == recoveryInfoDfs_header: # 强制执行记录
  1324. dfKey = "recoveryInfoDfs";
  1325. dfMap[dfKey]["dfs"].append({"df": df});
  1326. keyList.append(dfKey);
  1327. # 设置分页
  1328. dfMap[dfKey]["page"] = p + 1;
  1329. # 打印结果解析并构建指标
  1330. for key in dfMap:
  1331. if dfMap[key].__contains__("page"):
  1332. logger.info(key + "-page-" + str(dfMap[key]["page"]))
  1333. if dfMap[key].__contains__("dfs"):
  1334. if key == "loanDfs": # 贷款账户
  1335. for idx in range(0, len(dfMap[key]["dfs"])):
  1336. tempDfObj = dfMap[key]["dfs"][idx];
  1337. if tempDfObj.__contains__("isByPage"):
  1338. # print("贷款账户被分页#################")
  1339. # print(key + "============被分页页数============" + str(tempDfObj["isByPage"]))
  1340. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  1341. # logger.info(tempDfObj["df"].values)
  1342. else: # 未被分页
  1343. # logger.info(tempDfObj["df"].values)
  1344. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  1345. elif key == "creditCardDfs": # 贷记卡账户合并
  1346. for idx in range(0, len(dfMap[key]["dfs"])):
  1347. tempDfObj = dfMap[key]["dfs"][idx];
  1348. creditCardAccountDfs.append(dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime']))
  1349. elif key == "creditCardDfsZ": # 贷记卡账户合并
  1350. for idx in range(0, len(dfMap[key]["dfs"])):
  1351. tempDfObj = dfMap[key]["dfs"][idx];
  1352. creditCardAccountDfsZ.append(dfParser.mergeCreditCardDfZ(tempDfObj, idx,queryInfo['reportTime']))
  1353. elif key == "recoveryInfoDfs": # 贷记卡账户合并
  1354. for idx in range(0, len(dfMap[key]["dfs"])):
  1355. tempDfObj = dfMap[key]["dfs"][idx];
  1356. recoveryInfoDfs.append(dfParser.mergeRecoveryInfoDf(tempDfObj, idx, queryInfo['reportTime']))
  1357. else: # 其他
  1358. for tempDfObj in (dfMap[key]["dfs"]):
  1359. if tempDfObj.__contains__("isByPage"):
  1360. logger.info(key + "============其他被分页页数============" + str(tempDfObj["isByPage"]))
  1361. # logger.info(tempDfObj["df"].values)
  1362. else: # 单笔
  1363. tempDfObj = dfMap[key];
  1364. if tempDfObj.__contains__("isByPage"):
  1365. logger.info(key + "============被分页页数================" + str(tempDfObj["isByPage"]))
  1366. # logger.info(tempDfObj["df"].values)
  1367. if key == "queryInfoDf": # 解析被查询信息
  1368. parseQueryInfo(tempDfObj);
  1369. # print("\033[1;31m +查询信息+ \033[0m")
  1370. # print(queryInfo)
  1371. elif key == "identityDf": # 身份信息
  1372. parseIdentity(tempDfObj)
  1373. # print("\033[1;31m +身份信息+ \033[0m")
  1374. # print(identity)
  1375. elif key == "mateDf": # 配偶信息
  1376. parseMate(tempDfObj)
  1377. # print("\033[1;31m +配偶信息+ \033[0m")
  1378. # print(mate)
  1379. elif key == "liveInfoDf": # 居住信息
  1380. parseLiveInfo(tempDfObj)
  1381. # print("\033[1;31m +居住信息+ \033[0m")
  1382. elif key == "occupationDf": # 居住信息
  1383. parseOccupationInfoDf(tempDfObj)
  1384. elif key == "loanTradeInfoDf": # 信贷交易信息提示
  1385. parseLoanTradeInfo(tempDfObj);
  1386. # print("\033[1;31m +信贷交易信息提示+ \033[0m")
  1387. # print(loanTradeInfo)
  1388. elif key == "badDebtsInfoSumDf": # 呆账信息汇总
  1389. parseBadDebtsInfoSumDf(tempDfObj)
  1390. # print("\033[1;31m +呆账信息汇总+ \033[0m")
  1391. # print(overdueBrief)
  1392. elif key == "recoveryInfoDf": # 被追偿信息汇总-资产处置和垫款
  1393. parseRecoveryInfoSum(tempDfObj)
  1394. # print("\033[1;31m +资产处置和垫款+ \033[0m")
  1395. # print(overdueBrief)
  1396. elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总
  1397. parseOverdueInfoSum(tempDfObj)
  1398. # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m")
  1399. # print(overdueInfo)
  1400. elif key == "loanAccountInfoSumDf": # 非循环贷账户信息汇总 TODO
  1401. # tempDfObj_cycleLoanAccount = dfMap["cycleLoanAccountInfoSumDf"];
  1402. # tempDfObj_cycleCredit = dfMap["cycleCreditAccountInfoSumDf"];
  1403. # if not tempDfObj_cycleLoanAccount["df"].empty: # 循环贷
  1404. # tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleLoanAccount["df"][2:3]], axis=0,ignore_index=True)
  1405. # if not tempDfObj_cycleCredit["df"].empty: # 额度下循环分账户
  1406. # tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObj_cycleCredit["df"][2:3]], axis=0, ignore_index=True)
  1407. parseLoanAccountInfoSum(tempDfObj)
  1408. # print("\033[1;31m +贷款信息汇总+ \033[0m")
  1409. # print(loanAccountInfoSum)
  1410. elif key == "cycleCreditAccountInfoSumDf":#循环额度
  1411. parseCycleCreditAccountInfoSum(tempDfObj)
  1412. elif key == "cycleLoanAccountInfoSumDf":#循环贷
  1413. parseCyleLoanAccountInfoSum(tempDfObj)
  1414. elif key == "creditCardInfoSumDf":#贷记卡
  1415. # tempDfObjZ = dfMap["creditCardInfoSumDfZ"]; # 准贷记卡纳入计算 2:3为准贷记卡数据
  1416. # if not tempDfObjZ["df"].empty:
  1417. # tempDfObj["df"] = pd.concat([tempDfObj["df"], tempDfObjZ["df"][2:3]], axis=0, ignore_index=True)
  1418. parseCreditCardInfoSum(tempDfObj)
  1419. # print("\033[1;31m +贷记卡信息汇总+ \033[0m")
  1420. # print(creditCardInfoSum)
  1421. elif key == "creditCardInfoSumDfZ": # 准贷记卡
  1422. parseCreditCardInfoSumZ(tempDfObj)
  1423. elif key == "publicInfoBriefDf":
  1424. parsePublicInfoBrief(tempDfObj);
  1425. elif key == "queryRecordSumDf":
  1426. parseQueryRecordSum(tempDfObj);
  1427. elif key == "queryRecordDetailDf": # 查询记录明细
  1428. parseQueryInfoDetail(tempDfObj)
  1429. # print("\033[1;31m +查询记录明细+ \033[0m")
  1430. # print(queryInfoDetail)
  1431. result = ""
  1432. # 基本信息
  1433. # result+=("\033[1;34m +身份信息+ \033[0m")+"\n"
  1434. result+=utils.toJson(identityInfoDf)+"\n"
  1435. result += utils.toJson(mateInfoDf) + "\n"
  1436. result += utils.toJson(liveInfoDf) + "\n"
  1437. result += utils.toJson(occupationInfoDf) + "\n"
  1438. # result+=("\033[1;34m +概要信息+ \033[0m")+"\n"
  1439. # result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+"\n"
  1440. result+=utils.toJson(briefInfoDf_loanTradeInfo)+"\n"
  1441. # result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+"\n"
  1442. result+=utils.toJson(briefInfoDf_recoveryInfoSum)+"\n"
  1443. result += utils.toJson(briefInfoDf_badDebtsInfoSum) + "\n"
  1444. # result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+"\n"
  1445. #此信息先占位
  1446. result+="briefInfoDf_overdueInfoSum"+"\n"
  1447. # result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+"\n"
  1448. result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+"\n"
  1449. #公共信息
  1450. result += utils.toJson(publicInfoBriefDf) + "\n"
  1451. #查询记录汇总
  1452. result += utils.toJson(queryRecordSumDf) + "\n"
  1453. # 单独输出贷款df
  1454. # logger.info("\033[1;34m +贷款信息Dataframe+ \033[0m")
  1455. # logger.info(dfParser.dfHeaderLoan)
  1456. loanMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoan)
  1457. loanPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoanPayRecord)
  1458. # 输出数据
  1459. for loanDfObj in loanAccountDfs:
  1460. loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True);
  1461. loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0,
  1462. ignore_index=True);
  1463. # logger.info(loanMergeDf.values)
  1464. # logger.info("\033[1;34m +贷款信息还款记录Dataframe+ \033[0m")
  1465. # logger.info(dfParser.dfHeaderLoanPayRecord)
  1466. # logger.info(loanPayRecordMergeDf.values)
  1467. #
  1468. #==============================信贷交易明细 ===============================
  1469. #被追偿信息
  1470. result += utils.toJson(creditTradeDetailDf_recoveryInfo) + "\n"
  1471. # 信贷交易明细-解析非循环贷账户
  1472. parseLoanAccountInfo(loanMergeDf);
  1473. result += utils.toJson(creditTradeDetailDf_loanAccountInfo) + "\n"
  1474. #循环额度分账户
  1475. parseCycleCreditAccountInfo(loanMergeDf);
  1476. result += utils.toJson(creditTradeDetailDf_cycleCreditAccountInfo) + "\n"
  1477. #循环贷
  1478. parseCycleLoanAccountInfo(loanMergeDf);
  1479. result += utils.toJson(creditTradeDetailDf_cycleLoanAccountInfo) + "\n"
  1480. # 解析贷款账户指标
  1481. parseLoanMergeDf(loanMergeDf);
  1482. # 解析还款记录相关指标
  1483. parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf);
  1484. # logger.info(loanAccountInfo)
  1485. # logger.info(consts.loanAccountInfoHeader)
  1486. # logger.info(loanAccountInfoDf.values)
  1487. # result+=("\033[1;34m +贷款账户信息+ \033[0m")+"\n"
  1488. result+=utils.toJson(loanAccountInfoDf)+"\n"
  1489. #贷记卡合并df
  1490. creditCardMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCard)
  1491. creditCardPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecord)
  1492. # logger.info("\033[1;34m +贷记卡信息Dataframe+ \033[0m")
  1493. # logger.info(dfParser.dfHeaderCreditCard)
  1494. # 输出数据
  1495. for creditCardDfObj in creditCardAccountDfs:
  1496. creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True);
  1497. creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True);
  1498. # logger.info(creditCardMergeDf.values)
  1499. # 解析贷记卡账户指标
  1500. parseCreditCardMergeDf(creditCardMergeDf);
  1501. parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf)
  1502. #准贷记卡合并df
  1503. creditCardMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardZ)
  1504. creditCardPayRecordMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecordZ)
  1505. for creditCardDfObj in creditCardAccountDfsZ:
  1506. creditCardMergeDfZ = pd.concat([creditCardMergeDfZ, creditCardDfObj["creditCardDfZ"]], axis=0, ignore_index=True);
  1507. creditCardPayRecordMergeDfZ = pd.concat([creditCardPayRecordMergeDfZ, creditCardDfObj["creditCardPayRecordDfZ"]], axis=0,ignore_index=True);
  1508. #解析准贷记卡相关指标
  1509. parseCreditCardMergeDfZ(creditCardMergeDfZ);
  1510. #加工使用率指标
  1511. # 被追偿信息合并df
  1512. recoveryInfoMergeDf = pd.DataFrame(columns=dfParser.dfHeaderRecoveryInfo)
  1513. for recoveryInfoDfObj in recoveryInfoDfs:
  1514. recoveryInfoMergeDf = pd.concat([recoveryInfoMergeDf, recoveryInfoDfObj["recoveryInfoDf"]], axis=0, ignore_index=True);
  1515. parseRecoveryInfoMergeDf(recoveryInfoMergeDf);
  1516. # result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+"\n"
  1517. result+=utils.toJson(creditCardAccountInfoDf)+"\n"
  1518. result += utils.toJson(creditCardAccountInfoDfZ) + "\n"
  1519. #使用率
  1520. parseUseRate()
  1521. result += utils.toJson(useRateDf) + "\n"
  1522. #开户数
  1523. parseOpenAccount(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1524. result += utils.toJson(openAccountDf) + "\n"
  1525. #24期还款状态
  1526. parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1527. result += utils.toJson(payRcdStatusDf) + "\n"
  1528. #由于逾期汇总的指标再还款状态之后需要替换占位 TODO
  1529. result = result.replace("briefInfoDf_overdueInfoSum",utils.toJson(briefInfoDf_overdueInfoSum))
  1530. # result+=("\033[1;34m +查询记录明细+ \033[0m")+"\n"
  1531. result+=utils.toJson(queryRecordDetailDf)+"\n"
  1532. return result;
  1533. # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig')
  1534. if __name__ == '__main__':
  1535. basePath = "D:/mydocument/myproject/git/busscredit/Crerdai/";
  1536. pdf_path = basePath + "闻海雁532329198801060347.pdf"
  1537. # pdf_path = basePath+"雷雨晴130630199006130027.pdf"
  1538. pdf_path=basePath+"杨安140402197102111236.pdf"
  1539. # pdf_path=basePath+"刘盼兰130133198912261210.pdf"
  1540. # pdf_path=basePath+"马维强130521198604045272.pdf"
  1541. pdf_path = basePath + "郑晨晨130681199008205811.pdf"
  1542. # pdf_path=basePath+"人行征信模拟数据报告.pdf"
  1543. basePath = "D:/mydocument/myproject/git/busscredit/20200414_report/";
  1544. # pdf_path = basePath + "艾思语51112319960218732X.pdf"
  1545. isBat = True
  1546. isBat = False
  1547. if isBat:
  1548. for file in os.listdir(basePath):
  1549. if file.endswith("pdf"):
  1550. start = timeit.default_timer();
  1551. pdf_path = basePath+file;
  1552. outPath = pdf_path.replace("pdf",'txt')
  1553. if os.path.exists(outPath):
  1554. continue;
  1555. logger.info(file + "解析开始...")
  1556. result = main(pdf_path)
  1557. # print(result)
  1558. #输出到文件
  1559. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1560. print(result.replace("\033[1;34m","").replace("\033[0m",""))
  1561. logger.info(file+"解析完成")
  1562. gc.collect()
  1563. s = timeit.default_timer() - start;
  1564. logger.info(str(s) + " 秒")
  1565. else:
  1566. start = timeit.default_timer();
  1567. logger.info(pdf_path + "解析开始...")
  1568. outPath = pdf_path.replace("pdf", 'txt')
  1569. result = main(pdf_path)
  1570. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1571. print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  1572. logger.info(pdf_path + "解析完成")
  1573. s = timeit.default_timer() - start;
  1574. logger.info(str(s) + " 秒")