parseCreditPdf.py 163 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362
  1. #coding=utf-8
  2. import shutil
  3. import pdfplumber
  4. import pandas as pd
  5. import numpy as np;
  6. import sys
  7. import os
  8. import traceback
  9. from prp import PrpCrypt
  10. #指标相关
  11. import loanIndexParser as lip;
  12. import payRcdIndexParser as prp;
  13. import creditCardIndexParser as cip
  14. import queryInfoIndexParser as qip
  15. import requests
  16. import utils;
  17. import time;
  18. import consts;
  19. import math
  20. import dfParser;
  21. import gc
  22. import json
  23. from dbController import DbController
  24. from pboc.invokePboc import PBOC
  25. from ini_op import Config;
  26. base_dir = os.path.dirname(os.path.abspath(__file__))
  27. config = Config(base_dir+"/config.ini");
  28. productNumJz = config.get("baseconf","productNumJz")
  29. productNumXxw = config.get("baseconf","productNumXxw")
  30. productNumXy = config.get("baseconf", "productNumXy")
  31. productNumFb = config.get("baseconf", "productNumFb")
  32. productNumKcd = config.get("baseconf", "productNumKcd")
  33. #连接数据库
  34. dbController = DbController();
  35. pd.set_option('mode.chained_assignment', None)
  36. import log
  37. logger = log.logger
  38. import xyHttp
  39. # 查询信息
  40. dfMap = {};
  41. allHeaders = [] # 所有表头
  42. queryInfoDf = pd.DataFrame();
  43. queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"];
  44. dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None};
  45. allHeaders.append(queryInfoDf_header);
  46. # 身份信息
  47. identityDf = pd.DataFrame();
  48. identity_header = ['性别', '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱']
  49. addressDf = pd.DataFrame(); # 通讯地址
  50. dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None};
  51. allHeaders.append(identity_header);
  52. # 配偶信息
  53. mateDf = pd.DataFrame();
  54. mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话']
  55. dfMap["mateDf"] = {"df": mateDf, "nextDf": None};
  56. allHeaders.append(mateDf_header);
  57. # 居住信息====暂时该信息没有用到先不解析
  58. liveInfoDf = pd.DataFrame();
  59. liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期']
  60. dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None};
  61. allHeaders.append(liveInfoDf_header);
  62. # 职业信息
  63. occupationDf = pd.DataFrame();
  64. occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话']
  65. occupationInfo_header1 = ['编号', '职业', '行业', '职务', '职称', '进入本单位年份', '信息更新日期']
  66. dfMap["occupationDf"] = ({"df": occupationDf, "nextDf": None});
  67. # allHeaders.append(occupationInfo_header1);
  68. allHeaders.append(occupationInfo_header);
  69. # 上次查询记录
  70. preQueryRcd_header0 = ['上一次查询记录']
  71. allHeaders.append(preQueryRcd_header0);
  72. # 查询记录概要
  73. # queryInfoBriefDf = pd.DataFrame();
  74. # queryInfoBrief_header0 = ['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  75. # queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查']
  76. # dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None});
  77. # allHeaders.append(queryInfoBrief_header0);
  78. # allHeaders.append(queryInfoBrief_header1);
  79. # 信贷交易信息提示
  80. loanTradeInfoDf = pd.DataFrame();
  81. loanTradeInfo_header = ['业务类型', '账户数', '首笔业务发放月份'];
  82. dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None});
  83. allHeaders.append(loanTradeInfo_header)
  84. # 信贷交易违约信息概要
  85. # 被追偿信息汇总 资产处置和垫款业务
  86. recoveryInfoSumDf = pd.DataFrame();
  87. recoveryInfoSumDf_header = ['业务种类', '账户数', '余额'];
  88. dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None});
  89. allHeaders.append(recoveryInfoSumDf_header)
  90. # 呆账信息汇总
  91. badDebtsInfoSumDf = pd.DataFrame();
  92. badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总
  93. dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None});
  94. allHeaders.append(badDebtsInfoSumDf_header)
  95. # 逾期透资信息汇总
  96. overdueInfoSumDf = pd.DataFrame();
  97. overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数']
  98. dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None});
  99. allHeaders.append(overdueInfoSumDf_header)
  100. # 非循环贷账户信息汇总
  101. loanAccountInfoSumDf = pd.DataFrame();
  102. loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总']
  103. loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  104. dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None});
  105. allHeaders.append(loanAccountInfoSumDf_header0)
  106. allHeaders.append(loanAccountInfoSumDf_header1)
  107. # 循环额度下分账户信息汇总
  108. cycleCreditAccountInfoSumDf = pd.DataFrame();
  109. cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总']
  110. cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'],
  111. dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None});
  112. allHeaders.append(cycleCreditAccountInfoSumDf_header0)
  113. allHeaders.append(cycleCreditAccountInfoSumDf_header1)
  114. # 循环贷账户信息汇总
  115. cycleLoanAccountInfoSumDf = pd.DataFrame();
  116. cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总']
  117. cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  118. dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None});
  119. allHeaders.append(cycleLoanAccountInfoSumDf_header0)
  120. allHeaders.append(cycleLoanAccountInfoSumDf_header1)
  121. # 贷记卡账户信息汇总
  122. creditCardInfoSumDf = pd.DataFrame();
  123. creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总']
  124. creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  125. dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None});
  126. allHeaders.append(creditCardInfoSumDf_header0)
  127. allHeaders.append(creditCardInfoSumDf_header1)
  128. # 准贷记卡账户信息汇总
  129. creditCardInfoSumDfZ = pd.DataFrame();
  130. creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总']#'准贷记卡账户信息汇总'
  131. creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '透支余额', '最近6个月平\n均透支余额']
  132. dfMap["creditCardInfoSumDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None});
  133. allHeaders.append(creditCardInfoSumDfZ_header0)
  134. allHeaders.append(creditCardInfoSumDfZ_header1)
  135. #公共信息概要
  136. publicInfoBriefDf = pd.DataFrame();
  137. publicInfoBriefDf_header0 = ['公共信息汇总']
  138. dfMap["publicInfoBriefDf"] = ({"df": publicInfoBriefDf, "nextDf": None});
  139. allHeaders.append(publicInfoBriefDf_header0)
  140. #查询记录汇总
  141. queryRecordSumDf_header0=['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  142. queryRecordSumDf = pd.DataFrame();
  143. dfMap["queryRecordSumDf"] = ({"df": queryRecordSumDf, "nextDf": None});
  144. allHeaders.append(queryRecordSumDf_header0)
  145. # 非循环贷账户,循环额度下分账户
  146. # 循环贷账户
  147. loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种']
  148. loanDfs = [];
  149. dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []});
  150. allHeaders.append(loan_header)
  151. # 贷记卡账户
  152. creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式']
  153. creditCardDfs = [];
  154. dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []});
  155. allHeaders.append(creditCard_header)
  156. # 准备贷记卡账户
  157. creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式']
  158. creditCardDfsZ = [];
  159. dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []});
  160. allHeaders.append(creditCardZ_header)
  161. #
  162. # 相关还款责任信息汇总 未使用到
  163. # 信贷交易信息明细
  164. # 被追偿信息 未使用到
  165. recoveryInfoDfs_header = ['管理机构','业务种类','债权接收日期','债权金额','债权转移时的还款状态']
  166. recoveryInfoDfs = [];
  167. dfMap["recoveryInfoDfs"] = ({"dfs": recoveryInfoDfs, "nextDf": []});
  168. allHeaders.append(recoveryInfoDfs_header)
  169. # 公共信息明细
  170. # 强制执行记录
  171. forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式']
  172. forceExecRcdDfs = [];
  173. dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []});
  174. allHeaders.append(forceExecRcdDfs_header)
  175. # 查询记录
  176. queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因']
  177. dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []});
  178. allHeaders.append(queryRecordDetailDf_header)
  179. #住房公积金参缴记录
  180. housingFundRcdDfs_header =['参缴地', '参缴日期', '初缴月份', '缴至月份', '缴费状态', '月缴存额', '个人缴存比例', '单位缴存比例']
  181. housingFundRcdDfs = []
  182. dfMap["housingFundRcdDfs"] = ({"dfs": housingFundRcdDfs, "nextDf": []});
  183. allHeaders.append(housingFundRcdDfs_header)
  184. repaymentSumDf_header0=['相关还款责任信息汇总']
  185. dfMap["repaymentSumDf"] = ({"df": pd.DataFrame(), "nextDf": None});
  186. allHeaders.append(repaymentSumDf_header0)
  187. # 处理分页思路
  188. # df估计得放到对象里面,然后存储下一个df,一个对象里包含key
  189. # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df
  190. # 针对可合并的列的场景
  191. # =======
  192. keyList = [] # 存储所有的df的key列表
  193. # pd.Series()
  194. # 检查数据是否带表头
  195. # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下
  196. import timeit
  197. # 定义指标部分======================start
  198. reportTime = ""; # 报告时间
  199. # 被查询者姓名
  200. queryInfoName = "";
  201. queryInfoCardId = "" # 被查询者证件号码
  202. # 定义指标部分======================end
  203. # 被查询信息-基础信息
  204. # 报告时间
  205. # 被查询者姓名
  206. # 被查询者证件号码
  207. # 基础信息
  208. queryInfo = {"reportTime":"","queryInfoCardId":""}
  209. # 身份信息
  210. identity = {}
  211. # 配偶信息
  212. mate = {}
  213. # 信贷交易信息提示-信用提示
  214. loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0,
  215. 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0}
  216. # 逾期及违约信息概要
  217. overdueBrief = {}
  218. # 逾期及透资信息汇总
  219. # 贷款逾期账户数 loanOverdueAccount
  220. # 贷款逾期月份数 loanOverdueMonth
  221. # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal
  222. # 贷款最长逾期月数 loanMaxOverdueMonth
  223. overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "",
  224. "loanMaxOverdueMonth": "",
  225. "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "",
  226. "creditCardMaxOverdueMonth": ""}
  227. # 未结清贷款信息汇总
  228. # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  229. loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0}
  230. # 未销户贷记卡发卡法人机构数
  231. # 未销户贷记卡发卡机构数
  232. # 未销户贷记卡账户数
  233. # 未销户贷记卡授信总额
  234. # 未销户贷记卡单家行最高授信额
  235. # 未销户贷记卡单家行最低授信额
  236. # 未销户贷记卡已用额度
  237. # 未销户贷记卡近6月平均使用额度
  238. # 未结清贷记卡信息汇总
  239. # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  240. creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0,
  241. "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0}
  242. # 信 贷 审 批 查 询 记 录 明 细
  243. queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0,
  244. "lastTimeLoanApproveMonth": 0}
  245. #最近一笔结清贷款的贷款金额 
  246. loanAccountInfo = {"lastSettleLoanAmt": 0}
  247. loanAccountDfs=[];#横向合并
  248. creditCardAccountDfs=[];#贷记卡账户合并
  249. creditCardAccountDfsZ=[];#准贷记卡账户合并
  250. recoveryInfoAccountDfs=[];#被追偿账户合并
  251. housingFundRcdAccountDfs=[];#公积金账户合并
  252. #============================指标定义区 start=============================
  253. #基本信息 拆分
  254. # basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0])
  255. #身份信息
  256. identityInfoIndex = '身份信息'
  257. identityInfoDf = pd.DataFrame(columns=consts.identityInfoHeader,index=[identityInfoIndex])
  258. #配偶信息
  259. mateInfoIndex = '配偶信息'
  260. mateInfoDf = pd.DataFrame(columns=consts.mateInfoHeader,index=[mateInfoIndex])
  261. #居住信息
  262. liveInfoIndex = '居住信息'
  263. liveInfoDf = pd.DataFrame(columns=consts.liveInfoHeader,index=[liveInfoIndex])
  264. #职业信息
  265. occupationInfoIndex = '职业信息'
  266. occupationInfoDf = pd.DataFrame(columns=consts.occupationInfoHeader,index=[occupationInfoIndex])
  267. #信贷交易信息提示
  268. loanTradeInfoIndex = '信贷交易信息提示'
  269. briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo,index=[loanTradeInfoIndex])
  270. #被追偿信息汇总及呆账信息汇总
  271. recoveryInfoSumIndex = '信贷交易违约信息概要'
  272. briefInfoDf_recoveryInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo,index=[recoveryInfoSumIndex])
  273. #呆账信息汇总
  274. badDebtsInfoIndex = '呆账信息汇总'
  275. briefInfoDf_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_badDebtsInfoSum,index=[badDebtsInfoIndex])
  276. #逾期(透支)信息汇总
  277. overdueInfoSumIndex='逾期(透支)信息汇总'
  278. briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum,index=[overdueInfoSumIndex])
  279. #信贷交易授信及负债信息概要
  280. loanTradeCreditInfoIndex='信贷交易授信及负债信息概要'
  281. briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo,index=[loanTradeCreditInfoIndex]).fillna(0.0)
  282. #公共信息概要
  283. publicInfoBriefIndex = '公共信息概要'
  284. publicInfoBriefDf = pd.DataFrame(columns=consts.publicInfoBriefHeader,index=[publicInfoBriefIndex])
  285. #查询记录汇总
  286. queryRecordSumIndex = '查询记录汇总'
  287. queryRecordSumDf = pd.DataFrame(columns=consts.queryRecordSumHeader,index=[queryRecordSumIndex])
  288. #信贷交易明细-被追偿信息
  289. recoveryInfoIndex='被追偿信息'
  290. creditTradeDetailDf_recoveryInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_recoveryInfo,index=[recoveryInfoIndex])
  291. #信贷交易明细-特殊交易
  292. specialTradeIndex='特殊交易'
  293. creditTradeDetailHeader_specialTrade = pd.DataFrame(columns=consts.creditTradeDetailHeader_specialTrade,index=[specialTradeIndex])
  294. #信贷交易明细
  295. #非循环贷账户
  296. loanInfoIndex='非循环贷账户'
  297. creditTradeDetailDf_loanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_loanAccountInfo,index=[loanInfoIndex])
  298. #循环额度下分账户
  299. cycleCreditAccountInfoIndex='循环额度下分账户'
  300. creditTradeDetailDf_cycleCreditAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleCreditAccountInfo,index=[cycleCreditAccountInfoIndex])
  301. #循环贷账户
  302. cycleLoanAccountInfoIndex='循环贷账户'
  303. creditTradeDetailDf_cycleLoanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleLoanAccountInfo,index=[cycleLoanAccountInfoIndex])
  304. #贷款信息
  305. loanAccountInfoIndex='贷款信息'
  306. loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader,index=[loanAccountInfoIndex])
  307. #贷记卡信息
  308. creditCardAccountInfoIndex = '贷记卡账户'
  309. creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader,index=[creditCardAccountInfoIndex])
  310. #准贷记卡
  311. creditCardAccountInfoIndexZ = '准贷记卡账户'
  312. creditCardAccountInfoDfZ = pd.DataFrame(columns=consts.creditCardAccountInfoHeaderZ,index=[creditCardAccountInfoIndexZ])
  313. useRateIndex = '使用率'
  314. useRateDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_useRate,index=[useRateIndex])
  315. openAccountIndex = '开户数'
  316. openAccountDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_openAccount,index=[openAccountIndex])
  317. payRcdStatusIndex = '24期还款状态'
  318. payRcdStatusDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_payRcdStatus,index=[payRcdStatusIndex])
  319. #查询记录明细指标
  320. queryRecordDetailIndex = '信贷审批查询记录明细'
  321. queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader,index=[queryRecordDetailIndex])
  322. #住房公积金
  323. housingFundRcdIndex = '住房公积金参缴记录'
  324. housingFundRcdDf = pd.DataFrame(columns=consts.housingFundRcdHeader,index=[housingFundRcdIndex])
  325. #============================指标定义区 end=============================
  326. # 解析被查询信息指标
  327. def parseQueryInfo(dfObj):
  328. df = dfObj["df"];
  329. reportTime = df.loc[0, :][3]
  330. reportTime = reportTime.split(":")[1]
  331. reportTime = reportTime.replace(".", "-"); # 报告时间
  332. queryInfo["reportTime"] = reportTime
  333. row = df.loc[2, :]
  334. queryInfo["queryInfoName"] = row[0]; # 被查询者姓名
  335. # basicInfoDf.loc[0, '姓名'] = row[0]
  336. queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码
  337. # basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "")
  338. # 婚姻状况
  339. # 学历
  340. # 单位电话
  341. # 住宅电话
  342. # 通讯地址
  343. def parseIdentity(dfObj):
  344. df = dfObj["df"];
  345. if not df.empty:
  346. row1 = df.loc[1, :].dropna().reset_index(drop=True)
  347. # identity["marital"] = row1[3] # 婚姻状况
  348. # identity["education"] = row1[4] # 学历
  349. # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址
  350. identityInfoDf.loc[identityInfoIndex, '性别'] = row1[0]
  351. identityInfoDf.loc[identityInfoIndex, '出生日期'] = dfParser.formatDate(row1[1])[0:7]
  352. identityInfoDf.loc[identityInfoIndex, '国籍'] = row1[6]
  353. identityInfoDf.loc[identityInfoIndex, '户籍地址'] = row1[9].replace("\n", "")
  354. identityInfoDf.loc[identityInfoIndex, '婚姻状况'] = row1[2]
  355. identityInfoDf.loc[identityInfoIndex, '学历'] = row1[3].replace("\n", "")
  356. identityInfoDf.loc[identityInfoIndex, '学位'] = row1[4]
  357. identityInfoDf.loc[identityInfoIndex, '通讯地址'] = row1[8].replace("\n", "")
  358. identityInfoDf.loc[identityInfoIndex, '就业状况'] = row1[5]
  359. mobileDf = dfObj["mobileDf"];
  360. identityInfoDf.loc[identityInfoIndex, '历史手机号码数'] = mobileDf.index.size
  361. reportTime = queryInfo["reportTime"]
  362. identityInfoDf.loc[identityInfoIndex, '近3个月手机号码数'] = getLastMonthMobileCount(mobileDf,3,reportTime)
  363. identityInfoDf.loc[identityInfoIndex, '近6个月手机号码数'] = getLastMonthMobileCount(mobileDf, 6,reportTime)
  364. identityInfoDf.loc[identityInfoIndex, '近12个月手机号码数'] = getLastMonthMobileCount(mobileDf, 12,reportTime)
  365. identityInfoDf.loc[identityInfoIndex, '近24个月手机号码数'] = getLastMonthMobileCount(mobileDf, 24,reportTime)
  366. #最近几个月电话号码数
  367. def getLastMonthMobileCount(df, month,reportTime):
  368. # 当前日期
  369. last1MonthDateStr = reportTime
  370. # 最近一个月
  371. lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D')
  372. lastMonthMobileDf = df[df[5] >= str(lastMonthDate)]
  373. return lastMonthMobileDf.shape[0];
  374. # 配偶姓名
  375. # 配偶证件号码
  376. # 配偶工作单位
  377. # 配偶联系电话
  378. def parseMate(dfObj):
  379. df = dfObj["df"];
  380. if not df.empty:
  381. row1 = df.loc[1, :]
  382. mate["mateName"] = row1[0] # 配偶姓名
  383. mate["mateCardId"] = row1[2] # 配偶证件号码
  384. mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位
  385. mate["mateContactTel"] = row1[4]; # 配偶联系电话
  386. mateInfoDf.loc[mateInfoIndex, '姓名'] = row1[0]
  387. mateInfoDf.loc[mateInfoIndex, '证件号码'] = row1[2]
  388. mateInfoDf.loc[mateInfoIndex, '工作单位'] = row1[3].replace("\n", "");
  389. mateInfoDf.loc[mateInfoIndex, '联系电话'] = row1[4].replace("\n", "");
  390. #解析居住信息
  391. def parseLiveInfo(dfObj):
  392. df = dfObj["df"];
  393. if not df.empty:
  394. row1 = df.loc[1, :]
  395. liveInfoDf.loc[liveInfoIndex, '居住地址'] = row1[1]
  396. liveInfoDf.loc[liveInfoIndex, '住宅电话'] = row1[2]
  397. liveInfoDf.loc[liveInfoIndex, '历史居住地址个数'] = df.index.size-1;
  398. curDate = np.datetime64(time.strftime("%Y-%m-%d"));
  399. last3year = str(curDate)[0:4]
  400. last3yearDate = str(int(last3year)-3)+str(curDate)[4:10]
  401. lastLiveDf = df[df[4]>=last3yearDate];
  402. liveInfoDf.loc[liveInfoIndex, '最近3年内居住地址个数'] = lastLiveDf.index.size-1;
  403. houseIndex = df[df[3]=='自置'].index.size>0
  404. if (houseIndex):
  405. houseStr = '是'
  406. else:
  407. houseStr= '否'
  408. liveInfoDf.loc[liveInfoIndex, '当前居住状况-是否具有自有住房'] = houseStr;
  409. liveInfoDf.loc[liveInfoIndex, '居住状况'] = row1[3]
  410. liveInfoDf.loc[liveInfoIndex, '信息更新日期'] = row1[4]
  411. #解析职业信息
  412. def parseOccupationInfoDf(dfObj):
  413. df = dfObj["df"];
  414. if not df.empty:
  415. occIndex1 = 0#判断职业从哪行开始
  416. for i in range(0,df.index.size):
  417. if df.loc[i,:].dropna().tolist()==occupationInfo_header1:
  418. occIndex1=i;
  419. break;
  420. occDf = df[1:occIndex1].reset_index(drop=True)#工作单位
  421. occDfNew = pd.DataFrame()
  422. occDf1New = pd.DataFrame()
  423. #删除为none的列 合并的bug TODO
  424. for i in range(0,occDf.index.size):
  425. occDfNew = occDfNew.append([pd.DataFrame(occDf.iloc[i].dropna().reset_index(drop=True)).T],ignore_index=True)
  426. occDf1 = df[occIndex1+1:df.index.size].reset_index(drop=True) #职业
  427. for i in range(0,occDf1.index.size):
  428. occDf1New = occDf1New.append([pd.DataFrame(occDf1.iloc[i].dropna().reset_index(drop=True)).T], ignore_index=True)
  429. occDf = pd.concat([occDfNew, occDf1New], axis=1, ignore_index=True)#合并df
  430. row = occDf.loc[0, :].dropna()#取最新
  431. occupationInfoDf.loc[occupationInfoIndex, '工作单位'] = row[1]
  432. last3yearDate = utils.getLastMonthDate(queryInfo['reportTime'],12*3)
  433. occDf = utils.replaceDateColIdx(occDf,occDf.columns.size-1)
  434. dateIndex = occDf.columns.size-1;#日期列
  435. last3yearOccDf = occDf[occDf[dateIndex]>=last3yearDate]
  436. occupationInfoDf.loc[occupationInfoIndex, '最近3年内工作单位数'] = last3yearOccDf.index.size;
  437. occupationInfoDf.loc[occupationInfoIndex, '单位电话'] = row[4];
  438. reportTime = queryInfo['reportTime']
  439. try:
  440. minDateIndex = np.argmin(occDf[dateIndex]);
  441. maxDateIndex = np.argmax(occDf[dateIndex]);
  442. rowYearMin = occDf.loc[minDateIndex, :].dropna()
  443. rowYearMax = occDf.loc[maxDateIndex, :].dropna()
  444. if rowYearMin[10]!="--":
  445. occupationInfoDf.loc[occupationInfoIndex, '最早进入本单位年份距报告日期时长'] = int(str(np.datetime64(reportTime, "Y")))-int(rowYearMin[10])
  446. if rowYearMax[10]!="--":
  447. occupationInfoDf.loc[occupationInfoIndex, '最新进入本单位年份距报告日期时长'] = int(str(np.datetime64(reportTime, "Y")))-int(rowYearMax[10])
  448. except:
  449. logger.error("最早进入本单位年份距报告日期时长解析异常")
  450. row0 = occDf.loc[0,:].dropna().reset_index(drop=True)#最新
  451. occupationInfoDf.loc[occupationInfoIndex, '单位性质'] =row0[2]
  452. occupationInfoDf.loc[occupationInfoIndex, '单位地址'] = row0[3].replace("\n","")
  453. occupationInfoDf.loc[occupationInfoIndex, '职业'] = row0[6]
  454. occupationInfoDf.loc[occupationInfoIndex, '行业'] = row0[7]
  455. occupationInfoDf.loc[occupationInfoIndex, '职务'] = row0[8]
  456. occupationInfoDf.loc[occupationInfoIndex, '职称'] = row0[9]
  457. occupationInfoDf.loc[occupationInfoIndex, '进入本单位年份'] = row0[10]
  458. occupationInfoDf.loc[occupationInfoIndex, '信息更新日期'] = row0[11]
  459. occupationInfoDf.loc[occupationInfoIndex, '历史工作单位数'] = occDf1.index.size
  460. # 日期相减离当前时间月份
  461. # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4
  462. # def difMonth(dateStr):
  463. # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) -
  464. # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30);
  465. # 信贷交易明细汇总
  466. def parseLoanTradeInfo(dfObj):
  467. df = dfObj["df"];
  468. # row1 = df.loc[1, :]
  469. if not df.empty:
  470. loanMonthDf = df[1: 4]
  471. loanMonthDf = loanMonthDf.reset_index(drop=True)
  472. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '个人住房贷款账户数'] = utils.toInt(loanMonthDf.loc[0, :][2])
  473. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex,'个人商用房贷款(包括商住两用)账户数']=utils.toInt(loanMonthDf.loc[1, :][2])
  474. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '其他类贷款账户数'] = utils.toInt(loanMonthDf.loc[2, :][2])
  475. creditCardDf = df[4: 6];
  476. creditCardDf = creditCardDf.reset_index(drop=True)
  477. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '贷记卡账户数'] = utils.toInt(creditCardDf.loc[0, :][2])
  478. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '准贷记卡账户数'] = utils.toInt(creditCardDf.loc[1, :][2])
  479. # 解析呆账信息汇总
  480. def parseBadDebtsInfoSumDf(dfObj):
  481. df = dfObj["df"];
  482. if not df.empty:
  483. row1 = df.loc[2, :]
  484. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '账户数'] = row1[0];
  485. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '余额'] = utils.replaceAmt(row1[2]); #yuan
  486. # 解析被追偿信息汇总
  487. def parseRecoveryInfoSum(dfObj):
  488. df = dfObj["df"];
  489. if not df.empty:
  490. row1 = df.loc[2, :]
  491. row2 = df.loc[3, :]
  492. row3 = df.loc[4, :]
  493. overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数
  494. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务账户数'] = row1[1];
  495. overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额
  496. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务余额'] = utils.replaceAmt(row1[2]);
  497. overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数
  498. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务账户数'] = row2[1];
  499. overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额
  500. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务余额'] = utils.replaceAmt(row2[2]);
  501. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总账户数'] = row3[1];
  502. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总余额'] = utils.replaceAmt(row3[2]);
  503. # 贷款逾期账户数
  504. # 贷款逾期月份数
  505. # 贷款单月最高逾期总额
  506. # 贷款最长逾期月数
  507. def parseOverdueInfoSum(dfObj):
  508. df = dfObj["df"];
  509. if not df.empty:
  510. row2= df.loc[2, :]
  511. row3 = df.loc[3, :]
  512. row4 = df.loc[4, :]
  513. row5 = df.loc[5, :]
  514. row6 = df.loc[6, :]
  515. #这块的数据需要进行出来 TODO
  516. #yuan
  517. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户账户数'] = utils.toInt(row2[2]);
  518. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户月份数'] = utils.toInt(row2[3]);
  519. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户单月最高逾期总额'] = utils.replaceAmt(row2[4]);
  520. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户最长逾期月数'] = utils.toInt(row2[5]);
  521. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户账户数'] = utils.toInt(row3[2]);
  522. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户月份数'] = utils.toInt(row3[3]);
  523. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户单月最高逾期总额'] = utils.replaceAmt(row3[4]);
  524. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户最长逾期月数'] = utils.toInt(row3[5]);
  525. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户账户数'] = utils.toInt(row4[2]);
  526. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户月份数'] = utils.toInt(row4[3]);
  527. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户单月最高逾期总额'] = utils.replaceAmt(row4[4]);
  528. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户最长逾期月数'] = utils.toInt(row4[5]);
  529. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户账户数'] = utils.toInt(row5[2]);
  530. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户月份数'] = utils.toInt(row5[3]);
  531. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户单月逾期总额'] = utils.replaceAmt(row5[4]);
  532. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户最长逾期月数'] = utils.toInt(row5[5]);
  533. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户账户数'] = utils.toInt(row6[2]);
  534. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户月份数'] = utils.toInt(row6[3]);
  535. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户单月透支总额'] = utils.replaceAmt(row6[4]);
  536. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户最长透支月数'] = utils.toInt(row6[5]);
  537. overdueInfoAccountDf = df[df[1] != '--'];
  538. overdueInfoAccountDf = overdueInfoAccountDf[2:6] #yuan
  539. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户最长逾期/透支月数最大值']=np.max(overdueInfoAccountDf[5].astype('int')) #yuan
  540. #np.sum(overdueInfoAccountDf[1])
  541. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户数加总']= np.sum(overdueInfoAccountDf[2].astype('int'))#yuan TODO
  542. # briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目']=None# TODO
  543. # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum
  544. # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum
  545. # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum
  546. # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits
  547. # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance
  548. # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount
  549. # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数
  550. # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%')
  551. # 2.贷款额度>100元
  552. # 3.贷款状态不为“结清”"
  553. # 非循环贷账户信息汇总
  554. def doFilterCalc(dfx):
  555. dfx = dfx.replace('--', 0)
  556. return dfx;
  557. # 科学计数法转换
  558. def replaceAmt(dfx):
  559. return dfx.str.replace(',', '')
  560. # 非循环贷账户信息汇总
  561. def parseLoanAccountInfoSum(dfObj):
  562. df = dfObj["df"];
  563. if not df.empty:
  564. loanAccountInfoSumDf = df[2:3];
  565. loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0
  566. loanAccountInfoSumDf = loanAccountInfoSumDf.reset_index(drop=True)
  567. row0 = loanAccountInfoSumDf.loc[0,:]
  568. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户管理机构数'] = int(row0[0])
  569. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'] = int(row0[1])
  570. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  571. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  572. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  573. # 循环额度下分账户
  574. def parseCycleCreditAccountInfoSum(dfObj):
  575. df = dfObj["df"];
  576. if not df.empty:
  577. cycleCreditAccountInfoSumDf = df[2:3];
  578. cycleCreditAccountInfoSumDf = doFilterCalc(cycleCreditAccountInfoSumDf); # 替换--为0
  579. cycleCreditAccountInfoSumDf = cycleCreditAccountInfoSumDf.reset_index(drop=True)
  580. row0 = cycleCreditAccountInfoSumDf.loc[0,:]
  581. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户管理机构数'] = int(row0[0])
  582. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'] = int(row0[1])
  583. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额'] = int(utils.replaceAmt(row0[2]))
  584. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额'] = int(utils.replaceAmt(row0[3]))
  585. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  586. # 循环贷账户信息
  587. def parseCyleLoanAccountInfoSum(dfObj):
  588. df = dfObj["df"];
  589. if not df.empty:
  590. cycleLoanAccountInfoSumDf = df[2:3];
  591. cycleLoanAccountInfoSumDf = doFilterCalc(cycleLoanAccountInfoSumDf); # 替换--为0
  592. cycleLoanAccountInfoSumDf = cycleLoanAccountInfoSumDf.reset_index(drop=True)
  593. row0 = cycleLoanAccountInfoSumDf.loc[0,:]
  594. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户管理机构数'] = int(row0[0])
  595. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户账户数'] = int(row0[1])
  596. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  597. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  598. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  599. # 解析贷记卡信息汇总,包含准贷记卡
  600. def parseCreditCardInfoSum(dfObj):
  601. df = dfObj["df"];
  602. if not df.empty:
  603. creditCardInfoSumDf = df[2:3];
  604. creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0
  605. creditCardInfoSumDf = creditCardInfoSumDf.reset_index(drop=True)
  606. row0 = creditCardInfoSumDf.loc[0, :]
  607. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡发卡机构数'] = int(row0[0])
  608. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡账户数'] = int(row0[1])
  609. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  610. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  611. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  612. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  613. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  614. # 解析贷记卡信息汇总,包含准贷记卡
  615. def parseCreditCardInfoSumZ(dfObj):
  616. df = dfObj["df"];
  617. if not df.empty:
  618. creditCardInfoSumDfZ = df[2:3];
  619. creditCardInfoSumDfZ = doFilterCalc(creditCardInfoSumDfZ);
  620. creditCardInfoSumDfZ = creditCardInfoSumDfZ.reset_index(drop=True)
  621. row0 = creditCardInfoSumDfZ.loc[0, :]
  622. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡发卡机构数'] = int(row0[0])
  623. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡账户数'] = int(row0[1])
  624. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  625. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  626. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  627. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  628. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  629. #相关还款责任
  630. def parseRepaymentSum(dfObj):
  631. df = dfObj["df"];
  632. if not df.empty:
  633. row4 = df.loc[4,:].dropna().reset_index(drop=True)#第4行 为个人
  634. row8 = []
  635. if df.index.size ==9:
  636. row8 = df.loc[8,:].dropna().reset_index(drop=True)#第8行 为企业
  637. perAccountNum = 0;#个人账户数
  638. orgAccountNum = 0; # 企业账户数
  639. totalAccountNum = 0;#总账户数
  640. guaranteeAccountNum = 0;#相关还款责任总账户数-担保责任
  641. otherAccountNum =0;#相关还款责任总账户数-其他
  642. perGuaranteeAmt = 0#个人担保金额及其他
  643. orgGuaranteeAmt = 0#企业担保金额及其他
  644. totalGuaranteeAmt = 0;#总担保金额
  645. guaranteeAmt = 0;#相关还款责任总担保金额
  646. otherPaymentAmt = 0;#其他还款责任金额
  647. perGuaranteeBalance = 0 # 个人担保余额及其他
  648. orgGuaranteeBalance = 0 # 企业担保余额及其他
  649. totalGuaranteeBalance = 0;#总担保余额
  650. guaranteeBalance = 0;#相关还款责任总担保余额
  651. otherPaymentBalance = 0; # 其他还款责任余额
  652. #计算总账户数
  653. if row4[0] !="--":
  654. perAccountNum=perAccountNum+utils.toInt(row4[0])
  655. guaranteeAccountNum = guaranteeAccountNum + utils.toInt(row4[0])#个人担保责任账户数
  656. if row4[3] !="--":
  657. perAccountNum = perAccountNum + utils.toInt(row4[3])#其他
  658. otherAccountNum = otherAccountNum + utils.toInt(row4[3]) # 其他
  659. if len(row8)>0:
  660. if row8[0] != "--":
  661. orgAccountNum = orgAccountNum + utils.toInt(row8[0])
  662. guaranteeAccountNum = guaranteeAccountNum + utils.toInt(row8[0])#企业担保责任账户数
  663. if row8[3] != "--":
  664. orgAccountNum = orgAccountNum + utils.toInt(row8[3])#其他
  665. otherAccountNum = otherAccountNum + utils.toInt(row8[3]) # 其他
  666. totalAccountNum = perAccountNum+orgAccountNum
  667. #计算担保金额
  668. if row4[1] !="--":
  669. perGuaranteeAmt=perGuaranteeAmt+utils.replaceAmt(row4[1])#担保
  670. guaranteeAmt = guaranteeAmt + utils.replaceAmt(row4[1]) # 担保
  671. if row4[4] !="--":
  672. perGuaranteeAmt = perGuaranteeAmt + utils.replaceAmt(row4[4])#其他
  673. otherPaymentAmt = otherPaymentAmt + utils.replaceAmt(row4[4]) # 其他
  674. if len(row8)>0:
  675. if row8[1] != "--":
  676. orgGuaranteeAmt = orgGuaranteeAmt + utils.replaceAmt(row8[1])#担保
  677. guaranteeAmt = guaranteeAmt + utils.replaceAmt(row8[1]) # 担保
  678. if row8[4] != "--":
  679. orgGuaranteeAmt = orgGuaranteeAmt + utils.replaceAmt(row8[4])#其他
  680. otherPaymentAmt = otherPaymentAmt + utils.replaceAmt(row8[4]) # 其他
  681. totalGuaranteeAmt = perGuaranteeAmt + orgGuaranteeAmt
  682. # 计算余额
  683. if row4[2] !="--":
  684. perGuaranteeBalance=perGuaranteeBalance+utils.replaceAmt(row4[2])
  685. guaranteeBalance=guaranteeBalance+utils.replaceAmt(row4[2])#个人担保余额
  686. if row4[5] !="--":
  687. perGuaranteeBalance = perGuaranteeBalance + utils.replaceAmt(row4[5])#其他
  688. otherPaymentBalance = otherPaymentBalance + utils.replaceAmt(row4[5]) # 其他
  689. if len(row8)>0:
  690. if row8[2] != "--":
  691. orgGuaranteeBalance = orgGuaranteeBalance + utils.replaceAmt(row8[2])
  692. guaranteeBalance = guaranteeBalance + utils.replaceAmt(row8[2])#企业担保余额
  693. if row8[5] != "--":
  694. orgGuaranteeBalance = orgGuaranteeBalance + utils.replaceAmt(row8[5])
  695. otherPaymentBalance = otherPaymentBalance + utils.replaceAmt(row8[5]) # 其他
  696. totalGuaranteeBalance = perGuaranteeBalance + orgGuaranteeBalance
  697. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数(担保+其他+个人+企业)'] =totalAccountNum
  698. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额+总还款责任金额(个人+企业)'] =totalGuaranteeAmt
  699. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额+总其他余额(个人+企业)'] =totalGuaranteeBalance
  700. if totalGuaranteeAmt !=0:
  701. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额+总其他余额(个人+企业)/相关还款责任账户总担保金额+总其他金额(个人+企业)'] =\
  702. round(totalGuaranteeBalance / totalGuaranteeAmt, 2)
  703. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任担保总账户数-个人'] =perAccountNum
  704. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-个人'] =perGuaranteeAmt
  705. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-个人'] =perGuaranteeBalance
  706. if perGuaranteeBalance !=0:
  707. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-个人/相关还款责任总担保金额-个人'] = round(perGuaranteeBalance/perGuaranteeBalance,2)
  708. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-企业'] =orgAccountNum
  709. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-企业'] =orgGuaranteeAmt
  710. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-企业'] =orgGuaranteeBalance
  711. if orgGuaranteeAmt!=0:
  712. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-企业/相关还款责任总担保金额-企业'] = round(orgGuaranteeBalance/orgGuaranteeAmt,2)
  713. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-担保责任'] =guaranteeAccountNum
  714. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-担保责任'] =guaranteeAmt
  715. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额-担保责任'] =guaranteeBalance
  716. if guaranteeAmt!=0:
  717. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-担保责任/相关还款责任总担保金额-担保责任'] =round(guaranteeBalance/guaranteeAmt,2)
  718. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-其他'] =otherAccountNum
  719. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-其他'] =otherPaymentAmt
  720. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-其他'] =otherPaymentBalance
  721. if otherPaymentAmt!=0:
  722. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额-其他/相关还款责任账户总担保金额-其他'] =round(otherPaymentBalance/otherPaymentAmt,2)
  723. #解析公共信息汇总yuan
  724. def parsePublicInfoBrief(dfObj):
  725. df = dfObj["df"];
  726. if not df.empty:
  727. publicInfoBrief = df[1:6];
  728. # print(publicInfoBrief)
  729. publicInfoBrief = publicInfoBrief.reset_index(drop=True)
  730. # yuan
  731. row0 = publicInfoBrief.loc[1, :]
  732. # print(row0[0])
  733. row1 = publicInfoBrief.loc[2, :]
  734. row2 = publicInfoBrief.loc[3, :]
  735. row3 = publicInfoBrief.loc[4, :]
  736. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-记录数'] = int(row0[1])
  737. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-涉及金额'] = int(utils.replaceAmt(row0[2]))
  738. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-记录数'] = int(row1[1])
  739. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-涉及金额'] = int(utils.replaceAmt(row1[2]))
  740. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-记录数'] = int(row2[1])
  741. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-涉及金额'] = int(utils.replaceAmt(row2[2]))
  742. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-记录数'] = int(row3[1])
  743. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-涉及金额'] = int(utils.replaceAmt(row3[2]))
  744. #解析查询信息汇总
  745. def parseQueryRecordSum(dfObj):
  746. df = dfObj["df"];
  747. if not df.empty:
  748. queryRecordSumDfTmp = df[2:3];
  749. queryRecordSumDfTmp = queryRecordSumDfTmp.reset_index(drop=True)
  750. row0 = queryRecordSumDfTmp.loc[0, :]
  751. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-贷款审批'] =int(row0[0])
  752. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-信用卡审批'] =int(row0[1])
  753. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-贷款审批'] =int(row0[2])
  754. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-信用卡审批'] =int(row0[3])
  755. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-本人查询'] =int(row0[4])
  756. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-贷后管理'] =int(row0[5])
  757. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-担保资格审查'] =int(row0[6])
  758. # 解析查询记录明细
  759. def parseQueryInfoDetail(dfObj):
  760. df = dfObj["df"];
  761. reportTime = queryInfo["reportTime"];
  762. if not df.empty:
  763. df = utils.replaceDateCol(df)
  764. df = df[1:df.index.size] # 去掉表头
  765. queryRecordDetailDf.loc[queryRecordDetailIndex, '近1月查询次数'] =qip.getLastMonthQueryTimes(df, 1, "",reportTime)
  766. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数'] =qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  767. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数'] =qip.getLastMonthQueryTimes(df, 6, "",reportTime)
  768. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数'] =qip.getLastMonthQueryTimes(df, 12, "",reportTime)
  769. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近1个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 1, "", reportTime)
  770. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近3个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 3, "", reportTime)
  771. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 6, "", reportTime)
  772. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 12, "", reportTime)
  773. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 24, "", reportTime)
  774. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
  775. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
  776. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
  777. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
  778. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
  779. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
  780. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
  781. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数信用卡审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
  782. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
  783. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
  784. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
  785. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
  786. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.insuranceAprove,reportTime)
  787. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.insuranceAprove,reportTime)
  788. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanAfterMgr,reportTime)
  789. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanAfterMgr,reportTime)
  790. queryRecordDetailDf.loc[queryRecordDetailIndex, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
  791. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
  792. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
  793. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
  794. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
  795. #解析住房公积金
  796. def parseHousingFundRcd(df):
  797. if not df.empty:
  798. lastHousingFundRcdDf = df.sort_values(by=["信息更新日期"] , ascending=(False)).reset_index(drop=True)
  799. lastHousingFundRcdDf = lastHousingFundRcdDf[0:1]#最新
  800. row1 = lastHousingFundRcdDf.loc[0,:].dropna().reset_index(drop=True)
  801. housingFundRcdDf.loc[housingFundRcdIndex, '参缴地'] =row1[1]
  802. housingFundRcdDf.loc[housingFundRcdIndex, '参缴日期'] =row1[2]
  803. housingFundRcdDf.loc[housingFundRcdIndex, '初缴月份'] =row1[3]#初缴日期
  804. housingFundRcdDf.loc[housingFundRcdIndex, '缴至月份'] =row1[4]
  805. housingFundRcdDf.loc[housingFundRcdIndex, '缴费状态'] =row1[5]
  806. housingFundRcdDf.loc[housingFundRcdIndex, '月缴存额'] =row1[6]
  807. housingFundRcdDf.loc[housingFundRcdIndex, '个人存缴比例'] =row1[7]
  808. housingFundRcdDf.loc[housingFundRcdIndex, '单位存缴比例'] =row1[8]
  809. housingFundRcdDf.loc[housingFundRcdIndex, '缴费单位'] =row1[9]#扣缴单位
  810. housingFundRcdDf.loc[housingFundRcdIndex, '信息更新日期'] =row1[10]
  811. reportTime = queryInfo["reportTime"];
  812. lastDateStr = utils.getLastMonthDate(reportTime,12)
  813. avgHousingFundDf = df[df['缴至月份']>=lastDateStr]
  814. housingFundRcdDf.loc[housingFundRcdIndex, '最近1年公积金平均值'] = round(np.mean(avgHousingFundDf['月缴存额']),2)
  815. lastDateStr = utils.getLastMonthDate(reportTime, 12*3)
  816. avgHousingFundDf = df[df['缴至月份'] >= lastDateStr]
  817. housingFundRcdDf.loc[housingFundRcdIndex, '最近3年公积金平均值']= round(np.mean(avgHousingFundDf['月缴存额']),2)
  818. #解析贷款还款记录指标
  819. def parseLoanMergeAndPayRecordDf(df,payRcdDf):
  820. if not df.empty and not payRcdDf.empty:
  821. #正常
  822. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  823. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  824. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  825. #计算当前贷款,为还款记录的最后一期 0529
  826. curOverduePayRcdDf=overduePayRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  827. curOverduePayRcdDf = curOverduePayRcdDf.groupby(['账户编号']).head(1)
  828. curOverduePayRcdDf = curOverduePayRcdDf[curOverduePayRcdDf['还款状态'] > 0]
  829. #临时保存,不用过滤还款状态为0的
  830. payRcdMaxOverdueDf = overduePayRcdDf;
  831. #所有逾期的记录
  832. # overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0]
  833. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数'] = curOverduePayRcdDf['账户编号'].unique().size
  834. if normalDf.index.size>0:
  835. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数']/normalDf.index.size,3)
  836. #存在逾期的贷款账户 非结清的过滤出逾期的账户号
  837. overdueLoanDf = normalDf[normalDf['账户编号'].isin(curOverduePayRcdDf['账户编号'].values)]
  838. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size
  839. if normalDf['管理机构'].unique().size>0:
  840. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] / normalDf['管理机构'].unique().size,2)
  841. #还款记录按日期排序最近3笔的最大逾期期数
  842. loanAccountInfoDf.loc[loanAccountInfoIndex, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1);
  843. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  844. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  845. loanAccountInfoDf.loc[loanAccountInfoIndex, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  846. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
  847. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  848. reportTime = queryInfo["reportTime"]
  849. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf,reportTime, 24);
  850. payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"]
  851. # 贷款24期还款记录次数 剔除结清 转出 呆账
  852. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  853. payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  854. payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(24)
  855. #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
  856. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  857. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  858. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款24期还款记录次数'] = np.max(payRcdTimes)
  859. #解析信贷交易明细-特殊交易
  860. def parseSpecialTrade(df):
  861. if not df.empty:
  862. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '当前用户发生特殊交易的严重程度'] = np.max(df['严重程度'])#加工的指标
  863. maxChangeMonthIndex = np.argmax(np.abs(df['变更月数']))
  864. meanMonthValue = np.mean(np.abs(df['变更月数']))
  865. row0 = df.loc[maxChangeMonthIndex, :]
  866. settleDf = df[(df['特殊交易类型']=='提前结清') | (df['特殊交易类型']=='提前还款')]
  867. debtDf = df[(df['特殊交易类型'] == '以资抵债')]
  868. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户发生特殊交易变更月数的最大差值'] = row0[3]
  869. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户发生特殊交易变更月数的平均差值'] = round(meanMonthValue,2)
  870. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户特殊交易涉及的发生金额的最大值'] = np.max(df['发生金额'])
  871. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户特殊交易涉及的发生金额的平均值'] = round(np.mean(df['发生金额']),2)
  872. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户所有帐户发生提前还款交易的次数统计'] = settleDf.index.size
  873. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户所有帐户发生不良特殊交易的次数统计'] = debtDf.index.size;
  874. #信贷交易明细-非循环贷账户
  875. def parseLoanAccountInfo(df):
  876. if not df.empty:
  877. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  878. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')].reset_index(drop=True)
  879. normalDf = normalDf[0:loanAccountNum]#根据非循环贷账户数进行计算进行截取
  880. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  881. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  882. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  883. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  884. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  885. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  886. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  887. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  888. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  889. #信贷交易明细-循环额度分账户
  890. def parseCycleCreditAccountInfo(df):
  891. if not df.empty:
  892. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')].reset_index(drop=True)
  893. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  894. cycleCreditAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'])
  895. normalDf = normalDf[loanAccountNum:(loanAccountNum + cycleCreditAccountNum)]
  896. if not normalDf.empty:
  897. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  898. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  899. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  900. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  901. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  902. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  903. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  904. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  905. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  906. #信贷交易明细-循环贷账户
  907. def parseCycleLoanAccountInfo(df):
  908. if not df.empty:
  909. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  910. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  911. cycleCreditAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'])
  912. cycleAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户账户数'])
  913. normalDf = normalDf[(loanAccountNum+cycleCreditAccountNum):normalDf.index.size]
  914. if not normalDf.empty:
  915. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  916. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  917. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  918. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  919. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  920. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  921. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  922. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  923. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  924. #解析贷款账户信息指标
  925. def parseLoanMergeDf(df):
  926. if not df.empty:
  927. sortDf = df.sort_values(by=["账户关闭日期","借款金额(本金)"] , ascending=(False,False))
  928. sortDf = sortDf[sortDf['账户状态'] == '结清'];
  929. sortDf = sortDf.reset_index(drop=True)
  930. if not sortDf.empty:
  931. row0 = sortDf.loc[0, :]
  932. loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)']
  933. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)']
  934. openDate = dfParser.formatDate(row0['开立日期'])
  935. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的发放距今月数'] = utils.difMonthReportTime(openDate,queryInfo["reportTime"])
  936. settleDate = dfParser.formatDate(row0['账户关闭日期'])
  937. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的结清距今月数'] = utils.difMonthReportTime(settleDate,queryInfo["reportTime"])
  938. loanAccountInfoDf.loc[loanAccountInfoIndex, '历史贷款总法人机构数'] = df['管理机构'].unique().size
  939. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前同时在用的贷款机构数'] = df[df['余额(本金)']>0]['管理机构'].unique().size
  940. statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  941. bankDf = statusDf[statusDf['管理机构'].str.contains('银行')]
  942. #没有记录
  943. if statusDf.index.size==0:
  944. isNotBankCust = -1
  945. else:
  946. if bankDf.index.size >0:#有一条以上不为结清,请包含银行
  947. isNotBankCust = 1;
  948. else:
  949. isNotBankCust = 0;
  950. loanAccountInfoDf.loc[loanAccountInfoIndex, '是否有非银行贷款客户'] = isNotBankCust
  951. #最严重的五级分类
  952. # fiveType = ""
  953. # for fiveTypeTmp in consts.fiveType:
  954. # fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp];
  955. # if not fiveTypeDf.empty:
  956. # fiveType = fiveTypeTmp;
  957. # break;
  958. # loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款五级分类'] = fiveType
  959. #当前贷款LTV
  960. # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0
  961. # 的记录后,SUM(本金余额) / SUM(贷款本金)
  962. # 如本金余额为空和贷款本金为0或为空,则当条记录不计算
  963. loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')]
  964. badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))]
  965. balanceSum = np.sum(badSetDf['余额(本金)'].astype('int'))
  966. loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int'))
  967. if(loanAmtSum !=0):
  968. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2)
  969. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2)
  970. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  971. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  972. #['个人住房商业贷款','个人商用房(含商住两用)贷款','个人住房公积金贷款','房'],
  973. houseLtvList = consts.houseLtvList;
  974. # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)]
  975. # if not houseLtvDf.empty:
  976. # loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2)
  977. #['个人住房贷款','个人商用房(包括商住两用)贷款']
  978. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = lip.getCurLtv(badSetDf, houseLtvList)
  979. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款机构数量'] = loanLtvDf['管理机构'].unique().size
  980. cardLtvList = ['个人汽车消费贷款','车']
  981. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList)
  982. operateLtvList = ['个人经营性贷款']
  983. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList)
  984. consumeLtvList = ['其他个人消费贷款']
  985. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList)
  986. bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行','财务公司']
  987. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  988. bankLtvList = ['消费金融公司','汽车金融公司','信托公司']# TODO
  989. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  990. smallLoanLtvList = ['小额信贷公司']
  991. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList)
  992. #当前贷款最大逾期期数
  993. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数)
  994. loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  995. if not loanOverdueLtvDf.empty:
  996. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数'])
  997. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额'])
  998. loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True)
  999. maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数'])
  1000. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额']
  1001. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里
  1002. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3)
  1003. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3)
  1004. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6)
  1005. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6)
  1006. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6)
  1007. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12)
  1008. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12)
  1009. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12)
  1010. lastLoanDf = loanOverdueLtvDf;
  1011. if not lastLoanDf.empty:
  1012. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"])
  1013. normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)]
  1014. #未结清贷款总账户数:账户状态不等于结清和转出的记录数
  1015. notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  1016. if not notSettleDf.empty:
  1017. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数'] = normalDf.index.size
  1018. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2)
  1019. #当前未结清贷款余额总和
  1020. # ltvDf = tmpDf[tmpDf['业务种类'].isin(bizTypeList)]
  1021. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  1022. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  1023. # 当前未结清住房贷款余额总和
  1024. houseDf = notSettleDf[notSettleDf['业务种类'].isin(houseLtvList)]
  1025. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清住房贷款余额总和'] = np.sum(houseDf['余额(本金)'])
  1026. # 当前未结清汽车贷款余额总和
  1027. cardDf = notSettleDf[notSettleDf['业务种类'].isin(cardLtvList)]
  1028. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清汽车贷款余额总和'] = np.sum(cardDf['余额(本金)'])
  1029. # 当前未结清个人经营性贷款余额总和
  1030. operateLtvDf = notSettleDf[notSettleDf['业务种类'].isin(operateLtvList)]
  1031. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清个人经营性贷款余额总和'] = np.sum(operateLtvDf['余额(本金)'])
  1032. # 当前平均每月贷款余额总和
  1033. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前平均每月贷款余额总和'] = round(np.sum(notSettleDf['余额(本金)'])/12,2)
  1034. #当前正常贷款账户余额
  1035. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)'])
  1036. # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总)
  1037. if np.sum(notSettleDf['余额(本金)']) >0:
  1038. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']),2)
  1039. settleDf = df[(df['账户状态'] == '结清')]
  1040. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数'] = settleDf.index.size
  1041. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2)
  1042. #贷款24期还款记录次数 TODO
  1043. # 最近3个月个人消费贷款发放额度
  1044. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款'])
  1045. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3,queryInfo["reportTime"])
  1046. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6,queryInfo["reportTime"])
  1047. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12,queryInfo["reportTime"])
  1048. #未结清贷款平均剩余还款期数
  1049. payPieDf = notSettleDf[notSettleDf['还款期数']!='--']
  1050. if payPieDf.index.size!=0:
  1051. loanAccountInfoDf.loc[loanAccountInfoIndex, '未结清贷款平均剩余还款期数'] = round(np.sum(payPieDf['剩余还款期数'])/payPieDf.index.size,2)
  1052. # 当前贷款本月应还金额总和
  1053. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月应还金额总和'] = np.sum(notSettleDf['本月应还款'])
  1054. # 当前贷款本月实还金额总额
  1055. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月实还金额总额'] = np.sum(notSettleDf['本月实还款'])
  1056. #解析贷记卡账户信息指标
  1057. def parseCreditCardMergeDf(df):
  1058. if not df.empty:
  1059. # 历史信用卡总法人机构数
  1060. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size
  1061. # creditCardUseDf = df[df['已用额度']>0];
  1062. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size
  1063. #统一排除
  1064. creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1065. totalAmtDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1066. #大额专项分期额度(合计)
  1067. # 已用分期金额(合计)
  1068. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '大额专项分期额度(合计)'] = np.sum(creditDf['大额专项分期额度'])
  1069. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '已用分期金额(合计)'] = np.sum(creditDf['已用分期金额'])
  1070. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf)
  1071. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf)
  1072. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9)
  1073. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, totalAmtDf, 1)
  1074. # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度)
  1075. useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1076. totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
  1077. totalCreditAmt = np.sum(totalCreditDf['账户授信额度'])
  1078. if totalCreditAmt != 0:#授信额度不能为0
  1079. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2)
  1080. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2)
  1081. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2)
  1082. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数'] = np.max(creditDf['当前逾期期数'])#用于计算
  1083. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期金额'] = np.max(creditDf['当前逾期总额'])
  1084. if not creditDf.empty:
  1085. creditDf = creditDf.reset_index(drop=True)
  1086. maxOverdueIndex = np.argmax(creditDf['当前逾期期数'])
  1087. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额']
  1088. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3)
  1089. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3)
  1090. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3)
  1091. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6)
  1092. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6)
  1093. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6)
  1094. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12)
  1095. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12)
  1096. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12)
  1097. if not creditDf.empty:
  1098. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"])
  1099. paySo = np.sum(creditDf['本月应还款'])
  1100. if(paySo)!=0:
  1101. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡还款比例'] = round(np.sum(creditDf['本月实还款'])/np.sum(creditDf['本月应还款']),2)
  1102. creditDfTmp = creditDf[creditDf['本月应还款']>0]
  1103. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最高还款比例'] = round(np.max(np.divide(creditDfTmp['本月实还款'] , creditDfTmp['本月应还款'])), 2)
  1104. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最低还款比例'] = round(np.min(np.divide(creditDfTmp['本月实还款'] , creditDfTmp['本月应还款'])), 2)
  1105. normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
  1106. notCloseDf = df[(df['账户状态'] != '销户')]
  1107. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数'] = normalDf.index.size
  1108. if not notCloseDf.empty and not normalDf.empty:
  1109. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数占比'] = round(normalDf.index.size/notCloseDf.index.size,2)
  1110. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡已用额度'] = np.sum(normalDf['已用额度'])
  1111. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数'] = normalDf[normalDf['已用额度']>0].index.size
  1112. if not creditDf.empty:
  1113. creditUseAmt = np.sum(creditDf['已用额度'])
  1114. if creditUseAmt!=0:
  1115. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2)
  1116. if notCloseDf.empty:
  1117. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = -99
  1118. else:
  1119. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = \
  1120. round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数']/notCloseDf.index.size,3)
  1121. #当前正常贷记卡账户余额占总余额比
  1122. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月实还金额总和'] = np.sum(creditDf['本月实还款'])
  1123. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月应还金额总和'] = np.sum(creditDf['本月应还款'])
  1124. maxAmtDf = df[(df['币种'] == '人民币元')]
  1125. if not maxAmtDf.empty:
  1126. maxAmtDf = maxAmtDf.reset_index(drop=True)
  1127. maxAmtIndex = np.argmax(maxAmtDf['账户授信额度'])
  1128. maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期'];
  1129. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"]);
  1130. # 名下贷记卡数量-状态正常
  1131. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] = df[(df['账户状态'] != '销户')].index.size
  1132. # 名下贷记卡数量-状态未激活
  1133. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] = df[(df['账户状态'] == '未激活')].index.size
  1134. # 名下贷记卡数量-状态异常--异常包含(2-冻结,3-止付,5-呆帐,10-其他)
  1135. abnormalList = ['冻结','止付','呆帐','其他']
  1136. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] = df[(df['账户状态'].isin(abnormalList))].index.size
  1137. # 名下贷记卡比例-状态正常
  1138. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态正常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] / df.index.size,2)
  1139. # 名下贷记卡比例-状态未激活
  1140. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态未激活'] =round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] / df.index.size,2)
  1141. # 名下贷记卡比例-状态异常
  1142. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态异常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] / df.index.size,2)
  1143. #解析准贷记卡账户信息指标
  1144. def parseCreditCardMergeDfZ(df,payRcd):
  1145. if not df.empty:
  1146. overdueCreditCardRcdDf = payRcd[payRcd['账户编号'].isin(df['账户编号'].values)];
  1147. overdueCreditCardRcdDf = utils.replacePayRcdStatusOverdue(overdueCreditCardRcdDf)
  1148. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '本月应还款(合计)'] = np.nansum(df['透支余额'])
  1149. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '本月实还款(合计)'] = np.nansum(df['本月实还款'])
  1150. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '最近一次还款日期'] = np.nanmax(df['最近一次还款日期'])
  1151. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前一共透支期数'] = cip.getCurOverdueNum(overdueCreditCardRcdDf);
  1152. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前一共透支总额'] = np.nansum(df['透支余额'])
  1153. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '透支180天以上未支付余额(合计)'] = np.nansum(df['透支180天以上未付余额'])
  1154. creditDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户')]
  1155. if not creditDf.empty:
  1156. totalAmt = np.nansum(creditDf['账户授信额度'])
  1157. creditAmt = np.nansum(creditDf['透支余额'])
  1158. if totalAmt !=0:
  1159. #从“贷记卡信息”中提取,剔除未激活、销户后,所有账户透支金额/所有账户账户授信额度。
  1160. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '全部准贷记卡账户当前总额度使用率']=round(creditAmt/totalAmt,2)
  1161. #从“贷记卡信息”中提取,剔除未激活、销户后,MAX(单账户最高透支金额/单账户授信额度)
  1162. creditMaxDf = creditDf[creditDf['账户授信额度']>0]
  1163. if not creditMaxDf.empty:
  1164. creditMaxDf = creditMaxDf.fillna(0.0)
  1165. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '准贷记卡账户最高使用额度总的使用率'] = round(np.max(np.divide(creditMaxDf['最大透支余额'],creditMaxDf['账户授信额度'])),2)
  1166. creditMaxDf = creditDf[creditDf['最大透支余额'] > 0]
  1167. if not creditMaxDf.empty:
  1168. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡最大透支金额'] = np.max(creditMaxDf['最大透支余额'])
  1169. #从“贷记卡信息”中提取,剔除未激活、销户后,当前透支准贷记卡账户数/总准贷记卡账户数,透支账户判断:透支余额不为0的账户
  1170. creditDfTmp = creditDf[creditDf['透支余额']>0]
  1171. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡透支账户数占比'] = round(creditDfTmp.index.size / creditDf.index.size,2)
  1172. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡本月应还金额总和'] = np.nansum(df['透支余额'])
  1173. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡本月实还金额总和'] = np.nansum(df['本月实还款'])
  1174. #解析使用率 TODO 使用汇总计算还是使用明细计算
  1175. def parseUseRate():
  1176. # useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)']
  1177. # 从“信贷交易授信及负债信息概要”中“非循环贷账户信息汇总”、“循环额度下分账户信息汇总”、“循环贷账户信息汇总”、“贷记卡账户信息汇总”和“准贷记卡账户信息汇总”里提取,SUM(
  1178. # 所有“余额”、“已用额度”和“透支余额”) / SUM(所有“授信总额”和“授信额度”)
  1179. loanUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额']
  1180. cycleCreditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额']
  1181. cycleUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额']
  1182. creditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度']
  1183. creditAmtUseZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度']
  1184. loanTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额']
  1185. cycleCreditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额']
  1186. cycleTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额']
  1187. creditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额']
  1188. creditAmtTotalZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额']
  1189. # if str(loanUseAmt)=="nan":
  1190. # loanUseAmt = 0;
  1191. # if str(cycleCreditUseAmt) == "nan":
  1192. # loanUseAmt = 0;
  1193. # if str(cycleCreditUseAmt) == "nan":
  1194. # loanUseAmt = 0;
  1195. useAmt = loanUseAmt+cycleCreditUseAmt+cycleUseAmt+creditUseAmt+creditAmtUseZ
  1196. totalAmt = loanTotalAmt+cycleCreditTotalAmt+cycleTotalAmt+creditTotalAmt+creditAmtTotalZ
  1197. if totalAmt !=0:
  1198. useRateDf.loc[useRateIndex, '全账户使用率(已用额度/授信总额)'] = round(useAmt / totalAmt,2)
  1199. if loanTotalAmt!=0:
  1200. useRateDf.loc[useRateIndex, '非循环贷账户使用率(已用额度/授信总额)'] = round(loanUseAmt / loanTotalAmt,2)
  1201. if cycleCreditTotalAmt !=0:
  1202. useRateDf.loc[useRateIndex, '循环额度下分账户使用率(已用额度/授信总额)'] = round(cycleCreditTotalAmt / cycleCreditTotalAmt,2)
  1203. if cycleTotalAmt !=0:
  1204. useRateDf.loc[useRateIndex, '循环贷账户使用率(已用额度/授信总额)'] = round(cycleUseAmt / cycleTotalAmt,2)
  1205. if creditTotalAmt !=0:
  1206. useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)'] = round(creditUseAmt / creditTotalAmt,2)
  1207. if creditAmtTotalZ !=0:
  1208. useRateDf.loc[useRateIndex, '准贷记卡账户使用率(已用额度/授信总额)'] = round(creditAmtUseZ / creditAmtTotalZ,2)
  1209. #解析开户数
  1210. def parseOpenAccount(loanDf,creditCardDf,creditCardDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1211. reportTime = queryInfo["reportTime"];
  1212. openAccountDf.loc[openAccountIndex, '近3个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,3)+cip.getOpenAccount(creditCardDf,reportTime,3)+cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1213. openAccountDf.loc[openAccountIndex, '近6个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,6)+cip.getOpenAccount(creditCardDf,reportTime,6)+cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1214. openAccountDf.loc[openAccountIndex, '近9个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,9)+cip.getOpenAccount(creditCardDf,reportTime,9)+cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1215. openAccountDf.loc[openAccountIndex, '近12个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,12)+cip.getOpenAccount(creditCardDf,reportTime,12)+cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1216. openAccountDf.loc[openAccountIndex, '近24个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,24)+cip.getOpenAccount(creditCardDf,reportTime,24)+cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1217. openAccountDf.loc[openAccountIndex, '近3个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,consts.bankList)
  1218. openAccountDf.loc[openAccountIndex, '近6个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,consts.bankList)
  1219. openAccountDf.loc[openAccountIndex, '近9个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,consts.bankList)
  1220. openAccountDf.loc[openAccountIndex, '近12个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,consts.bankList)
  1221. openAccountDf.loc[openAccountIndex, '近24个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,consts.bankList)
  1222. openAccountDf.loc[openAccountIndex, '近3个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,"")
  1223. openAccountDf.loc[openAccountIndex, '近6个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,"")
  1224. openAccountDf.loc[openAccountIndex, '近9个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,"")
  1225. openAccountDf.loc[openAccountIndex, '近12个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,"")
  1226. openAccountDf.loc[openAccountIndex, '近24个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,"")
  1227. openAccountDf.loc[openAccountIndex, '近3个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,3)
  1228. openAccountDf.loc[openAccountIndex, '近6个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,6)
  1229. openAccountDf.loc[openAccountIndex, '近9个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,9)
  1230. openAccountDf.loc[openAccountIndex, '近12个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,12)
  1231. openAccountDf.loc[openAccountIndex, '近24个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,24)
  1232. openAccountDf.loc[openAccountIndex, '近3个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1233. openAccountDf.loc[openAccountIndex, '近6个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1234. openAccountDf.loc[openAccountIndex, '近9个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1235. openAccountDf.loc[openAccountIndex, '近12个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1236. openAccountDf.loc[openAccountIndex, '近24个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1237. #从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”、“贷记卡账户”和“准贷记卡账户”里提取,5年里账户还款状态出现“1、2、3、4、5、6、7、D、Z、G、B”的账户数/所有账户数
  1238. overdueLoanPayRcdDf = loanPayRecordMergeDf[loanPayRecordMergeDf['账户编号'].isin(loanDf['账户编号'].values)]
  1239. overdueLoanPayRcdDf = utils.replacePayRcdStatusOverdue(overdueLoanPayRcdDf)
  1240. overdueLoanPayRcdDf = overdueLoanPayRcdDf[overdueLoanPayRcdDf['还款状态'] > 0]
  1241. overdueCreditPayRcdDf = creditCardPayRecordMergeDf[creditCardPayRecordMergeDf['账户编号'].isin(creditCardDf['账户编号'].values)]
  1242. overdueCreditPayRcdDf = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDf)
  1243. overdueCreditPayRcdDf = overdueCreditPayRcdDf[overdueCreditPayRcdDf['还款状态'] > 0]
  1244. overdueCreditPayRcdDfZ = creditCardPayRecordMergeDfZ[creditCardPayRecordMergeDfZ['账户编号'].isin(creditCardDfZ['账户编号'].values)]
  1245. overdueCreditPayRcdDfZ = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDfZ)
  1246. overdueCreditPayRcdDfZ = overdueCreditPayRcdDfZ[overdueCreditPayRcdDfZ['还款状态'] > 0]
  1247. loanAccountNum = loanPayRecordMergeDf['账户编号'].unique().size
  1248. creditAccountNum = creditCardPayRecordMergeDf['账户编号'].unique().size
  1249. creditAccountNumZ = creditCardPayRecordMergeDfZ['账户编号'].unique().size
  1250. overdueLoanNum = overdueLoanPayRcdDf['账户编号'].unique().size
  1251. overdueCreditNum = overdueCreditPayRcdDf['账户编号'].unique().size
  1252. overdueCreditNumZ = overdueCreditPayRcdDfZ['账户编号'].unique().size
  1253. if (loanAccountNum+creditAccountNum+creditAccountNumZ) >0:
  1254. openAccountDf.loc[openAccountIndex, '有过逾期记录的账户/全账户数'] = round((overdueLoanNum+overdueCreditNum+overdueCreditNumZ)/(loanAccountNum+creditAccountNum+creditAccountNumZ),2)
  1255. otherPerLoanDf = loanDf[loanDf['业务种类'].isin(consts.bankList)]
  1256. otherPerLoanNum = otherPerLoanDf.index.size;
  1257. overdueOtherPerLoanNum = otherPerLoanDf[otherPerLoanDf['账户编号'].isin(overdueLoanPayRcdDf['账户编号'].values)].index.size;
  1258. if otherPerLoanNum!=0:
  1259. openAccountDf.loc[openAccountIndex, '有过逾期记录的消费金融类账户/全消费金融类账户数'] = round(overdueOtherPerLoanNum/otherPerLoanNum,2)
  1260. if loanAccountNum!=0:
  1261. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷款账户/全贷款账户数'] = round(overdueLoanNum/loanAccountNum,2)
  1262. if creditAccountNum!=0:
  1263. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷记卡账户/全贷记卡账户数'] = round(overdueCreditNum/creditAccountNum,2)
  1264. if creditAccountNumZ!=0:
  1265. openAccountDf.loc[openAccountIndex, '有过透支记录的准贷记卡账户/全准贷记卡账户数']= round(overdueCreditNumZ/creditAccountNumZ,2)
  1266. # 0525新增
  1267. pledgeLoanDf = loanDf[loanDf['担保方式'] =='抵押']
  1268. pledgeCreditCardDf = creditCardDf[creditCardDf['担保方式'] == '抵押']
  1269. pledgeCreditCardDfZ = creditCardDfZ[creditCardDfZ['担保方式'] == '抵押']
  1270. isPledge = "否"
  1271. if pledgeLoanDf.index.size+pledgeCreditCardDf.index.size+pledgeCreditCardDfZ.index.size >0:
  1272. isPledge = "是"
  1273. creditLoanDf = loanDf[loanDf['担保方式'] == '信用/免担保']
  1274. creditCreditCardDf = creditCardDf[creditCardDf['担保方式'] == '信用/免担保']
  1275. creditCreditCardDfZ = creditCardDfZ[creditCardDfZ['担保方式'] == '信用/免担保']
  1276. isCredit = 0
  1277. if creditLoanDf.index.size + creditCreditCardDf.index.size + creditCreditCardDfZ.index.size > 0:
  1278. isCredit = creditLoanDf.index.size + creditCreditCardDf.index.size + creditCreditCardDfZ.index.size
  1279. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '是否存在担保方式为抵押的贷款'] = isPledge
  1280. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '担保方式为信用的贷款数量'] = isCredit
  1281. #解析24期还款状态指标
  1282. def parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1283. #creditCardPayRecordMergeDf
  1284. # 去掉外币
  1285. creditCardMergeDf = creditCardMergeDf[creditCardMergeDf['币种']=='人民币元']
  1286. creditCardPayRecordMergeDf = creditCardPayRecordMergeDf[creditCardPayRecordMergeDf['账户编号'].isin(creditCardMergeDf['账户编号'].values)]
  1287. reportTime = queryInfo["reportTime"];
  1288. reportTime = str(np.datetime64(reportTime, "M"))+"-02"#06-02,统计24期还款状态报告期,按每月的2号,避免chu'xian
  1289. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)
  1290. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)
  1291. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)
  1292. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)
  1293. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)
  1294. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)
  1295. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)
  1296. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)
  1297. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)
  1298. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)
  1299. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或大等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)
  1300. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)
  1301. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近3月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)
  1302. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)
  1303. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)
  1304. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)
  1305. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)
  1306. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)
  1307. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)
  1308. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)
  1309. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)
  1310. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)
  1311. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)
  1312. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)
  1313. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1314. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1315. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1316. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,6)
  1317. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1318. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1319. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)\
  1320. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,3)
  1321. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“1”的次数'] = \
  1322. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)\
  1323. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,6)
  1324. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“1”的次数'] = \
  1325. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)\
  1326. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,12)
  1327. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“1”的次数'] = \
  1328. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)\
  1329. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,24)
  1330. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“2”的次数'] = \
  1331. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)\
  1332. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,6)
  1333. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“2”的次数'] = \
  1334. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)\
  1335. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,12)
  1336. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“2”的次数'] = \
  1337. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)\
  1338. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,24)
  1339. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“3”的次数'] = \
  1340. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)\
  1341. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1342. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“3”的次数'] = \
  1343. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)\
  1344. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1345. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“3”的次数'] = \
  1346. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)\
  1347. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1348. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“4”的次数'] = \
  1349. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)\
  1350. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1351. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“4”的次数'] = \
  1352. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)\
  1353. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1354. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"G"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"G",24)
  1355. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDf,reportTime,"G",24)
  1356. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDfZ,reportTime,"G",24)
  1357. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"Z"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"Z",24)
  1358. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24)
  1359. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月状态正常账户数目'] = prp.getLoanNormalCount(loanPayRecordMergeDf,reportTime,24)
  1360. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24)
  1361. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDf,reportTime,24)
  1362. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1363. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1364. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去3个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,3)
  1365. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去6个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,6)
  1366. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去12个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,12)
  1367. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去24个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,24)
  1368. #概要信息里的字段,从还款状态计算
  1369. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目'] = \
  1370. prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24*5)+prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24*5)\
  1371. +prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24*5)
  1372. #解析贷款还款记录指标
  1373. def parseCreditCardMergeAndPayRecordDf(df,payRcdDf):
  1374. if not df.empty and not payRcdDf.empty:
  1375. # 正常
  1376. normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1377. if not normalDf.empty:
  1378. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1379. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  1380. # 计算当前贷款,为还款记录的最后一期 0529
  1381. curOverduePayRcdDf = overduePayRcdDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  1382. curOverduePayRcdDf = curOverduePayRcdDf.groupby(['账户编号']).head(1)
  1383. curOverduePayRcdDf = curOverduePayRcdDf[curOverduePayRcdDf['还款状态'] > 0]
  1384. # 临时保存,不用过滤还款状态为0的
  1385. payRcdMaxOverdueDf = overduePayRcdDf;
  1386. # overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0]
  1387. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数'] = curOverduePayRcdDf['账户编号'].unique().size
  1388. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数)
  1389. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数占比'] = round(curOverduePayRcdDf['账户编号'].unique().size / normalDf.index.size, 2)
  1390. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数
  1391. overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(curOverduePayRcdDf['账户编号'].values)]
  1392. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size
  1393. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2)
  1394. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  1395. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  1396. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近9月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  1397. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
  1398. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  1399. reportTime = queryInfo["reportTime"]
  1400. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf,reportTime, 24);
  1401. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近3个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,3);
  1402. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近6个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,6);
  1403. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近9个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,9);
  1404. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近12个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,12);
  1405. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近24个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,24);
  1406. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1407. payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  1408. payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(24)
  1409. payStatus = ["G", "D", "C", "N", "M", "1", "2", "3", "4", "5", "6", "7"]
  1410. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  1411. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  1412. #从“贷记卡信息”中提取,剔除未激活、销户、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数
  1413. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡24期还款记录次数'] = np.max(payRcdTimes)
  1414. # 解析被追偿信息汇总
  1415. def parseRecoveryInfoMergeDf(df):
  1416. if not df.empty:
  1417. recoveryMaxPayDf = df[df['债权转移时的还款状态'] !='--']
  1418. recoveryStatusCs = df[df['账户状态'] == '催收']
  1419. if not recoveryMaxPayDf.empty:
  1420. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时的最大还款状态'] = np.max(recoveryMaxPayDf['债权转移时的还款状态']);
  1421. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时属于催收状态的账户数'] = recoveryStatusCs.index.size;
  1422. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时属于催收状态的账户数/被追偿信息总数'] = round(recoveryStatusCs.index.size/df.index.size,2);
  1423. #creditTradeDetailDf_recoveryInfo
  1424. # 被追偿账户总数
  1425. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex,'被追偿账户总数'] = df.index.size;
  1426. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '被追偿业务种类'] = df['业务种类'].unique().size;
  1427. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '最新一笔被追偿债券接收时间'] = np.max(df['债权接收日期']);
  1428. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '总债权金额'] = np.max(df['债权金额']);
  1429. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '债权转移时的最大还款状态'] = np.max(recoveryMaxPayDf['债权转移时的还款状态']);
  1430. def main(pdf_path):
  1431. # 解析pdf开始
  1432. fileName = os.path.basename(pdf_path)
  1433. print(fileName)
  1434. with pdfplumber.open(pdf_path) as pdf:
  1435. for p in range(0, len(pdf.pages)):
  1436. # print(p)
  1437. page = pdf.pages[p]
  1438. # first_page = pdf.pages[1]
  1439. # if p == 3:
  1440. # print(3)
  1441. tables = page.extract_tables();
  1442. for i in range(0, len(tables)):
  1443. # print(i)
  1444. table = tables[i]
  1445. df = pd.DataFrame(table);
  1446. # if p==12:
  1447. # logger.info(p)
  1448. if len(keyList) > 1 and i == 0: # 判断是否被分页了
  1449. if not utils.checkHeader(df, allHeaders):
  1450. key = keyList[-1];
  1451. dfObj = dfMap[key]
  1452. # dfObj["nextDf"]=df;
  1453. # 贷款信息 贷记卡信息 强制执行记录
  1454. if key == "loanDfs" or key == "creditCardDfs" or key == "creditCardDfsZ" or key == "forceExecRcdDfs" or key == 'recoveryInfoDfs' or key == "housingFundRcdDfs": # 属于列表
  1455. lastDfObj = dfObj["dfs"][-1];
  1456. lastDfObj["isByPage"] = str(p + 1);
  1457. if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同
  1458. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0,ignore_index=True); # 去最后一个进行合并
  1459. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1460. else:
  1461. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1462. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True);
  1463. else: # 查询记录明细 为单个列表
  1464. dfObj["isByPage"] = str(p + 1);
  1465. logger.info(fileName+"#"+key)
  1466. if len(dfObj["df"].columns) == len(df.columns):
  1467. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1468. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1469. else:
  1470. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1471. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1472. # dfObj["nextDf"] = df;
  1473. # 如果列数相等合并df
  1474. continue;
  1475. headerList0 = df.loc[0, :].tolist() # 第0行为表头
  1476. headerList0 = list(filter(None, headerList0))
  1477. headerList1 = []
  1478. if df.index.size>1:
  1479. headerList1 = df.loc[1, :].tolist() # 第1行为表头
  1480. headerList1 = list(filter(None, headerList1))
  1481. if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据
  1482. queryInfoDf = df;
  1483. dfKey = "queryInfoDf"
  1484. dfMap[dfKey]["df"] = df;
  1485. keyList.append(dfKey);
  1486. elif headerList0 == identity_header: # 身份信息
  1487. identityDf = df[:2] # 截取前2行
  1488. addressDf = df.loc[2:4,:] # 截取3到4行的第一和6
  1489. addressDf = addressDf.reset_index(drop=True)
  1490. mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5)
  1491. identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并
  1492. dfKey = "identityDf"
  1493. dfMap[dfKey]["df"] = identityDf;
  1494. keyList.append(dfKey);
  1495. # 组装电话号码df
  1496. dfMap[dfKey]["mobileDf"] = mobileDf
  1497. elif headerList0 == mateDf_header: # 配偶信息
  1498. mateDf = df;
  1499. dfKey = "mateDf"
  1500. dfMap[dfKey]["df"] = df;
  1501. keyList.append(dfKey);
  1502. elif headerList0 == liveInfoDf_header: # 居住信息
  1503. mateDf = df;
  1504. dfKey = "liveInfoDf"
  1505. dfMap[dfKey]["df"] = df;
  1506. keyList.append(dfKey);
  1507. elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页
  1508. occupationDf = df;
  1509. dfKey = "occupationDf"
  1510. dfMap[dfKey]["df"] = df;
  1511. keyList.append(dfKey);
  1512. # elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据
  1513. # queryInfoBriefDf = df;
  1514. # dfKey = "queryInfoBriefDf"
  1515. # dfMap[dfKey]["df"] = df;
  1516. # keyList.append(dfKey);
  1517. elif headerList0 == loanTradeInfo_header: # 信贷交易信息
  1518. loanTradeInfoDf = df;
  1519. dfKey = "loanTradeInfoDf";
  1520. dfMap[dfKey]["df"] = df;
  1521. keyList.append(dfKey);
  1522. elif headerList1 == recoveryInfoSumDf_header: # 被追偿信息汇总
  1523. recoveryInfoSumDf = df;
  1524. dfKey = "recoveryInfoSumDf";
  1525. dfMap[dfKey]["df"] = df;
  1526. keyList.append(dfKey);
  1527. elif headerList1 == badDebtsInfoSumDf_header: # 呆账信息
  1528. badDebtsInfoSumDf = df;
  1529. dfKey = "badDebtsInfoSumDf";
  1530. dfMap[dfKey]["df"] = df;
  1531. keyList.append(dfKey);
  1532. elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总
  1533. overdueInfoSumDf = df;
  1534. dfKey = "overdueInfoSumDf";
  1535. dfMap[dfKey]["df"] = df;
  1536. keyList.append(dfKey);
  1537. elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总
  1538. loanAccountInfoSumDf = df;
  1539. dfKey = "loanAccountInfoSumDf";
  1540. dfMap[dfKey]["df"] = df;
  1541. keyList.append(dfKey);
  1542. elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总
  1543. creditCardInfoSumDf = df;
  1544. dfKey = "creditCardInfoSumDf";
  1545. dfMap[dfKey]["df"] = df;
  1546. keyList.append(dfKey);
  1547. elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据
  1548. dfKey = "creditCardInfoSumDfZ";
  1549. dfMap[dfKey]["df"] = df;
  1550. keyList.append(dfKey);
  1551. elif headerList0 == repaymentSumDf_header0:#相关还款责任汇总
  1552. dfKey = "repaymentSumDf";
  1553. dfMap[dfKey]["df"] = df;
  1554. keyList.append(dfKey);
  1555. elif headerList0 == publicInfoBriefDf_header0: #公共信息概要
  1556. dfKey = "publicInfoBriefDf";
  1557. dfMap[dfKey]["df"] = df;
  1558. keyList.append(dfKey);
  1559. elif headerList0 == queryRecordSumDf_header0:#查询记录汇总
  1560. dfKey = "queryRecordSumDf";
  1561. dfMap[dfKey]["df"] = df;
  1562. keyList.append(dfKey);
  1563. elif headerList0 == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户
  1564. dfKey = "loanDfs";
  1565. dfMap[dfKey]["dfs"].append({"df": df});
  1566. keyList.append(dfKey);
  1567. elif headerList0 == creditCard_header: # 贷记卡账户
  1568. dfKey = "creditCardDfs";
  1569. dfMap[dfKey]["dfs"].append({"df": df});
  1570. keyList.append(dfKey);
  1571. elif headerList0 == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并
  1572. dfKey = "creditCardDfsZ";
  1573. dfMap[dfKey]["dfs"].append({"df": df});
  1574. keyList.append(dfKey);
  1575. elif headerList0 == queryRecordDetailDf_header: # 查询记录明细
  1576. dfKey = "queryRecordDetailDf";
  1577. dfMap[dfKey]["df"] = df;
  1578. keyList.append(dfKey);
  1579. elif headerList0 == housingFundRcdDfs_header: # 查询记录明细
  1580. dfKey = "housingFundRcdDfs";
  1581. dfMap[dfKey]["dfs"].append({"df": df});
  1582. keyList.append(dfKey);
  1583. elif headerList0 == forceExecRcdDfs_header: # 强制执行记录
  1584. dfKey = "forceExecRcdDfs";
  1585. dfMap[dfKey]["dfs"].append({"df": df});
  1586. keyList.append(dfKey);
  1587. elif headerList0 == recoveryInfoDfs_header: # 被追偿信息
  1588. dfKey = "recoveryInfoDfs";
  1589. dfMap[dfKey]["dfs"].append({"df": df});
  1590. keyList.append(dfKey);
  1591. # 设置分页
  1592. dfMap[dfKey]["page"] = p + 1;
  1593. logger.info(fileName+"#"+"组装pdf数据完成")
  1594. logger.info(fileName+"#"+"解析基础pdf数据开始")
  1595. # print(dfMap)
  1596. for key in dfMap:
  1597. print(key)
  1598. # 打印结果解析并构建指标
  1599. for key in dfMap:
  1600. tempDfObjx = dfMap[key];
  1601. if tempDfObjx.__contains__("page"):
  1602. # print(tempDfObjx)
  1603. logger.info(fileName+"#"+key + "-page-" + str(tempDfObjx["page"]))
  1604. if tempDfObjx.__contains__("dfs"):
  1605. print("二")
  1606. # print(tempDfObjx)
  1607. print(len(tempDfObjx["dfs"]))
  1608. print(key)
  1609. if key == "loanDfs": # 贷款账户
  1610. for idx in range(0, len(tempDfObjx["dfs"])):
  1611. tempDfObj = tempDfObjx["dfs"][idx];
  1612. # print(tempDfObj)
  1613. print(idx)
  1614. # print(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  1615. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime'])) #yuan
  1616. elif key == "creditCardDfs": # 贷记卡账户合并
  1617. for idx in range(0, len(tempDfObjx["dfs"])):
  1618. print(idx)
  1619. # tempDfObj = tempDfObjx["dfs"][idx];
  1620. # tempCreditCardDf = dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime']);
  1621. # # print(tempCreditCardDf)
  1622. # if tempCreditCardDf!=None:
  1623. # creditCardAccountDfs.append(tempCreditCardDf)
  1624. elif key == "creditCardDfsZ": # 贷记卡账户合并
  1625. for idx in range(0, len(tempDfObjx["dfs"])):
  1626. print(idx)
  1627. # tempDfObj = tempDfObjx["dfs"][idx];
  1628. # tempCreditCardDfZ = dfParser.mergeCreditCardDfZ(tempDfObj, idx,queryInfo['reportTime'])
  1629. # if tempCreditCardDfZ!=None:
  1630. # creditCardAccountDfsZ.append(tempCreditCardDfZ)
  1631. elif key == "recoveryInfoDfs": # 贷记卡账户合并
  1632. for idx in range(0, len(tempDfObjx["dfs"])):
  1633. print(idx)
  1634. tempDfObj = tempDfObjx["dfs"][idx];
  1635. recoveryInfoAccountDfs.append(dfParser.mergeRecoveryInfoDf(tempDfObj, idx, queryInfo['reportTime']))
  1636. elif key == "housingFundRcdDfs": # 贷记卡账户合并
  1637. for idx in range(0, len(tempDfObjx["dfs"])):
  1638. print(idx)
  1639. tempDfObj = tempDfObjx["dfs"][idx];
  1640. housingFundRcdAccountDfs.append(dfParser.mergeHousingFundRcdDf(tempDfObj, idx, queryInfo['reportTime']))
  1641. else: # 其他
  1642. for tempDfObj in (tempDfObjx["dfs"]):
  1643. if tempDfObj.__contains__("isByPage"):
  1644. logger.info(fileName+"#"+key + "============其他被分页页数============" + str(tempDfObj["isByPage"]))
  1645. # logger.info(fileName+"#"+tempDfObj["df"].values)
  1646. else: # 单笔
  1647. # print(tempDfObjx)
  1648. tempDfObj = tempDfObjx;
  1649. if tempDfObj.__contains__("isByPage"):
  1650. logger.info(fileName+"#"+key + "============被分页页数================" + str(tempDfObj["isByPage"]))
  1651. # logger.info(fileName+"#"+tempDfObj["df"].values)
  1652. if key == "queryInfoDf": # 解析被查询信息
  1653. print(1)
  1654. parseQueryInfo(tempDfObj);
  1655. # print("\033[1;31m +查询信息+ \033[0m")
  1656. # print(queryInfo)
  1657. print(2)
  1658. elif key == "identityDf": # 身份信息
  1659. print(3)
  1660. parseIdentity(tempDfObj)
  1661. # print("\033[1;31m +身份信息+ \033[0m")
  1662. # print(identity)
  1663. print(4)
  1664. elif key == "mateDf": # 配偶信息
  1665. print(5)
  1666. parseMate(tempDfObj)
  1667. # print("\033[1;31m +配偶信息+ \033[0m")
  1668. # print(mate)
  1669. # print(mateInfoDf)
  1670. print(6)
  1671. elif key == "liveInfoDf": # 居住信息
  1672. print(7)
  1673. parseLiveInfo(tempDfObj)
  1674. # print(liveInfoDf)
  1675. print(7)
  1676. # print("\033[1;31m +居住信息+ \033[0m")
  1677. elif key == "occupationDf": # 居住信息
  1678. print(8)
  1679. parseOccupationInfoDf(tempDfObj)
  1680. # print(occupationDf)
  1681. print(9)
  1682. elif key == "loanTradeInfoDf": # 信贷交易信息提示
  1683. print(10)
  1684. parseLoanTradeInfo(tempDfObj);
  1685. # print("\033[1;31m +信贷交易信息提示+ \033[0m")
  1686. # print(loanTradeInfo)
  1687. print(11)
  1688. elif key == "badDebtsInfoSumDf": # 呆账信息汇总 ,卡住了
  1689. print(12)
  1690. # print(tempDfObj)
  1691. parseBadDebtsInfoSumDf(tempDfObj)
  1692. # print("\033[1;31m +呆账信息汇总+ \033[0m")
  1693. # print(overdueBrief)
  1694. # print(briefInfoDf_badDebtsInfoSum) # yuan
  1695. print(13)
  1696. elif key == "recoveryInfoSumDf": # 被追偿信息汇总-资产处置和垫款
  1697. print(14)
  1698. parseRecoveryInfoSum(tempDfObj)
  1699. # print("\033[1;31m +资产处置和垫款+ \033[0m")
  1700. # print(overdueBrief)
  1701. print(15)
  1702. elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总
  1703. print(16)
  1704. # print(tempDfObj) # yuan
  1705. parseOverdueInfoSum(tempDfObj)
  1706. # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m")
  1707. # print(briefInfoDf_overdueInfoSum)
  1708. # print(overdueInfo)
  1709. print(17)
  1710. elif key == "loanAccountInfoSumDf": # 非循环贷账户信息汇总 TODO
  1711. print(18)
  1712. parseLoanAccountInfoSum(tempDfObj)
  1713. # print(loanAccountInfoSumDf)
  1714. print(19)
  1715. elif key == "cycleCreditAccountInfoSumDf":#循环额度
  1716. print(20)
  1717. # print(cycleCreditAccountInfoSumDf)
  1718. parseCycleCreditAccountInfoSum(tempDfObj)
  1719. print(21)
  1720. elif key == "cycleLoanAccountInfoSumDf":#循环贷
  1721. print(23)
  1722. parseCyleLoanAccountInfoSum(tempDfObj)
  1723. # prin24)
  1724. elif key == "creditCardInfoSumDf":#贷记卡
  1725. print(25)
  1726. parseCreditCardInfoSum(tempDfObj)
  1727. # print(creditCardInfoSumDf)
  1728. print(26)
  1729. elif key == "creditCardInfoSumDfZ": # 准贷记卡
  1730. # print(creditCardInfoSumDfZ)
  1731. print(27)
  1732. parseCreditCardInfoSumZ(tempDfObj)
  1733. print(28)
  1734. elif key == "repaymentSumDf": # 相关还款责任
  1735. print(29)
  1736. parseRepaymentSum(tempDfObj)
  1737. print(30)
  1738. # print("还款责任集合")
  1739. elif key == "publicInfoBriefDf":
  1740. print(31)
  1741. # print(tempDfObj)
  1742. parsePublicInfoBrief(tempDfObj);
  1743. print(32)
  1744. elif key == "queryRecordSumDf":
  1745. print(33)
  1746. parseQueryRecordSum(tempDfObj);
  1747. print(34)
  1748. elif key == "queryRecordDetailDf": # 查询记录明细
  1749. print(35)
  1750. parseQueryInfoDetail(tempDfObj)#
  1751. print(36)
  1752. logger.info(fileName+"#"+"解析基础pdf数据完成")
  1753. print("解析完成")
  1754. result = "{"
  1755. # 基本信息
  1756. # result+=("\033[1;34m +身份信息+ \033[0m")+"\n"
  1757. result+=utils.toJson(identityInfoDf)+","
  1758. result += utils.toJson(mateInfoDf) + ","
  1759. result += utils.toJson(liveInfoDf) + ","
  1760. result += utils.toJson(occupationInfoDf) + ","
  1761. # result+=("\033[1;34m +概要信息+ \033[0m")+","
  1762. # result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+","
  1763. # result+=utils.toJson(briefInfoDf_loanTradeInfo)+","
  1764. result += "briefInfoDf_loanTradeInfo" + "," # 占位符
  1765. # result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+","
  1766. result+="briefInfoDf_recoveryInfoSum"+"," #占位符
  1767. result += utils.toJson(briefInfoDf_badDebtsInfoSum) + ","
  1768. # result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+","
  1769. #此信息先占位
  1770. result+="briefInfoDf_overdueInfoSum"+","
  1771. # result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+","
  1772. result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+","
  1773. #公共信息
  1774. result += utils.toJson(publicInfoBriefDf) + ","
  1775. #查询记录汇总
  1776. result += utils.toJson(queryRecordSumDf) + ","
  1777. # 单独输出贷款df
  1778. # logger.info(fileName+"#"+"\033[1;34m +贷款信息Dataframe+ \033[0m")
  1779. # logger.info(fileName+"#"+dfParser.dfHeaderLoan)
  1780. logger.info(fileName+"#"+pdf_path+"解析贷款数据开始")
  1781. loanMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoan)
  1782. loanPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoanPayRecord)
  1783. loanSpecialTradeMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoanSpecialTrade)#特殊交易
  1784. # 输出数据
  1785. for loanDfObj in loanAccountDfs:
  1786. loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True);
  1787. loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0,ignore_index=True);
  1788. loanSpecialTradeMergeDf = pd.concat([loanSpecialTradeMergeDf, loanDfObj["specialTradeDf"]], axis=0, ignore_index=True);
  1789. # logger.info(fileName+"#"+loanMergeDf.values)
  1790. # logger.info(fileName+"#"+"\033[1;34m +贷款信息还款记录Dataframe+ \033[0m")
  1791. # logger.info(fileName+"#"+dfParser.dfHeaderLoanPayRecord)
  1792. # logger.info(fileName+"#"+loanPayRecordMergeDf.values)
  1793. #
  1794. #==============================信贷交易明细 ===============================
  1795. #被追偿信息
  1796. # 被追偿信息合并df
  1797. recoveryInfoMergeDf = pd.DataFrame(columns=dfParser.dfHeaderRecoveryInfo)
  1798. for recoveryInfoDfObj in recoveryInfoAccountDfs:
  1799. recoveryInfoMergeDf = pd.concat([recoveryInfoMergeDf, recoveryInfoDfObj["recoveryInfoDf"]], axis=0,
  1800. ignore_index=True);
  1801. parseRecoveryInfoMergeDf(recoveryInfoMergeDf);
  1802. #被追偿信息
  1803. result = result.replace("briefInfoDf_recoveryInfoSum", utils.toJson(briefInfoDf_recoveryInfoSum))#替换汇总中的指标
  1804. result += utils.toJson(creditTradeDetailDf_recoveryInfo) + "," #设置占位符,由于存在概要的指标在明细中计算
  1805. #特殊交易
  1806. parseSpecialTrade(loanSpecialTradeMergeDf)
  1807. result += utils.toJson(creditTradeDetailHeader_specialTrade) + ","
  1808. # 信贷交易明细-解析非循环贷账户
  1809. parseLoanAccountInfo(loanMergeDf);
  1810. result += utils.toJson(creditTradeDetailDf_loanAccountInfo) + ","
  1811. #循环额度分账户
  1812. parseCycleCreditAccountInfo(loanMergeDf);
  1813. result += utils.toJson(creditTradeDetailDf_cycleCreditAccountInfo) + ","
  1814. #循环贷
  1815. parseCycleLoanAccountInfo(loanMergeDf);
  1816. result += utils.toJson(creditTradeDetailDf_cycleLoanAccountInfo) + ","
  1817. # 解析贷款账户指标
  1818. parseLoanMergeDf(loanMergeDf);
  1819. # 解析还款记录相关指标
  1820. parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf);
  1821. # logger.info(fileName+"#"+loanAccountInfo)
  1822. # logger.info(fileName+"#"+consts.loanAccountInfoHeader)
  1823. # logger.info(fileName+"#"+loanAccountInfoDf.values)
  1824. # result+=("\033[1;34m +贷款账户信息+ \033[0m")+","
  1825. result+=utils.toJson(loanAccountInfoDf)+","
  1826. logger.info(fileName+"#"+"解析贷款数据完成")
  1827. logger.info(fileName+"#"+"解析贷记卡数据开始")
  1828. #贷记卡合并df
  1829. creditCardMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCard)
  1830. creditCardPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecord)
  1831. # logger.info(fileName+"#"+"\033[1;34m +贷记卡信息Dataframe+ \033[0m")
  1832. # logger.info(fileName+"#"+dfParser.dfHeaderCreditCard)
  1833. # 输出数据
  1834. for creditCardDfObj in creditCardAccountDfs:
  1835. creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True);
  1836. creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True);
  1837. # logger.info(fileName+"#"+creditCardMergeDf.values)
  1838. # 解析贷记卡账户指标
  1839. parseCreditCardMergeDf(creditCardMergeDf);
  1840. parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf)
  1841. #准贷记卡合并df
  1842. creditCardMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardZ)
  1843. creditCardPayRecordMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecordZ)
  1844. for creditCardDfObj in creditCardAccountDfsZ:
  1845. creditCardMergeDfZ = pd.concat([creditCardMergeDfZ, creditCardDfObj["creditCardDfZ"]], axis=0, ignore_index=True);
  1846. creditCardPayRecordMergeDfZ = pd.concat([creditCardPayRecordMergeDfZ, creditCardDfObj["creditCardPayRecordDfZ"]], axis=0,ignore_index=True);
  1847. #解析准贷记卡相关指标
  1848. parseCreditCardMergeDfZ(creditCardMergeDfZ,creditCardPayRecordMergeDfZ);
  1849. logger.info(fileName+"#"+"解析贷记卡数据完成")
  1850. #加工使用率指标
  1851. # result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+","
  1852. result+=utils.toJson(creditCardAccountInfoDf)+","
  1853. result += utils.toJson(creditCardAccountInfoDfZ) + ","
  1854. #使用率
  1855. parseUseRate()
  1856. result += utils.toJson(useRateDf) + ","
  1857. #开户数
  1858. parseOpenAccount(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1859. result += utils.toJson(openAccountDf) + ","
  1860. #24期还款状态
  1861. parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1862. result += utils.toJson(payRcdStatusDf) + ","
  1863. #由于逾期汇总的指标再还款状态之后需要替换占位 TODO
  1864. result = result.replace("briefInfoDf_overdueInfoSum",utils.toJson(briefInfoDf_overdueInfoSum))
  1865. #0525 由于在开户数后,统计信贷信息概要的指标,替换占位符
  1866. result = result.replace("briefInfoDf_loanTradeInfo", utils.toJson(briefInfoDf_loanTradeInfo))
  1867. #公积金
  1868. # 被追偿信息合并df
  1869. housingFundRcdMergeDf = pd.DataFrame(columns=dfParser.dfHeaderHousingFundRcd)
  1870. for housingFundRcdDfObj in housingFundRcdAccountDfs:
  1871. housingFundRcdMergeDf = pd.concat([housingFundRcdMergeDf, housingFundRcdDfObj["housingFundRcdDf"]], axis=0,ignore_index=True);
  1872. parseHousingFundRcd(housingFundRcdMergeDf);
  1873. result += utils.toJson(housingFundRcdDf) + ","
  1874. # result+=("\033[1;34m +查询记录明细+ \033[0m")+","
  1875. result+=utils.toJson(queryRecordDetailDf)+""
  1876. result +="}"
  1877. # print(result)
  1878. return result;
  1879. #调用jar包
  1880. def invokePboc(basePath,pdf_path):
  1881. # ===================================
  1882. try:
  1883. # logger.error(pdf_path)
  1884. # fileName = os.path.basename(pdf_path)
  1885. # logger.error(fileName)
  1886. # jsonFileName = fileName.replace("pdf", 'txt')
  1887. businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1888. coopBussinessNum = dbController.getCoopBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1889. pboc = PBOC()
  1890. jarTxt = pboc.calc(pdf_path.replace("pdf", 'txt'),coopBussinessNum);
  1891. result = json.loads(jarTxt)
  1892. logger.info(result)
  1893. if result.get("errcode")== None:
  1894. uploadAudit(result,businessNum)
  1895. else:
  1896. logger.error(result["errmsg"])
  1897. except:
  1898. info = sys.exc_info()
  1899. logger.error(info[0])
  1900. logger.error(info[1])
  1901. # logging.log(logging.ERROR, info[2])
  1902. logger.error(traceback.extract_tb(info[2], 1))
  1903. #调用xxwjar包
  1904. def invokeXxw(basePath,pdf_path):
  1905. # ===================================
  1906. try:
  1907. # logger.error(pdf_path)
  1908. # fileName = os.path.basename(pdf_path)
  1909. # logger.error(fileName)
  1910. # jsonFileName = fileName.replace("pdf", 'txt')
  1911. businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1912. coopBussinessNum = dbController.getCoopBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1913. customerNum = dbController.getCustomerNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1914. pboc = PBOC()
  1915. jarTxt = pboc.calcXxw(coopBussinessNum,customerNum,pdf_path.replace("pdf", 'txt'));
  1916. logger.info(jarTxt)
  1917. result = json.loads(jarTxt)
  1918. logger.info(result)
  1919. # jsonPath = pdf_path.replace(".pdf", ".txt");
  1920. # file_name = os.path.basename(pdf_path)
  1921. # jsonPath = basePath+file_name
  1922. jsonPath = basePath+queryInfo["queryInfoCardId"]+".txt"
  1923. logger.info(jsonPath)
  1924. with open(jsonPath, 'w') as fp:
  1925. fp.write(jarTxt)
  1926. uploadReportResultXxw(jsonPath)
  1927. # descPdfPath = basePath + "execed_new/" + os.path.basename(pdf_path)
  1928. # if not os.path.exists(basePath + "execed_new/"):
  1929. # os.mkdir(basePath + "execed_new/")
  1930. # logger.info("移动文件 from " + pdf_path + " to " + descPdfPath)
  1931. # shutil.move(pdf_path, descPdfPath)
  1932. descJsonPath = basePath + "execed_txt/" + os.path.basename(jsonPath)
  1933. shutil.move(jsonPath, descJsonPath)
  1934. # descTxtPath = descPdfPath.replace(".pdf",".txt")
  1935. # txtPath = pdf_path.replace("pdf", 'txt')
  1936. # shutil.move(txtPath, descTxtPath)
  1937. except:
  1938. info = sys.exc_info()
  1939. logger.error(info[0])
  1940. logger.error(info[1])
  1941. # logging.log(logging.ERROR, info[2])
  1942. logger.error(traceback.extract_tb(info[2], 1))
  1943. #上传审批结果
  1944. def uploadAudit(result,businessNum):
  1945. approvalType = result["approveResult"]
  1946. if approvalType=="1":
  1947. approvalOpinion = "征信通过"
  1948. approvalType = "4"
  1949. else:
  1950. approvalOpinion = "征信拒绝"
  1951. approvalType = "3"
  1952. taskKey = config.get("baseconf","taskKey")
  1953. appoveApiUrl = config.get("baseconf","appoveApiUrl")
  1954. key = config.get("baseconf", "AESKey")
  1955. data = {"header":{
  1956. "ticket": "2938123198320412343",
  1957. "timestamp": int(int(round(time.time() * 1000+60*1000))),
  1958. "nonce": config.get("baseconf", "nonce")
  1959. },
  1960. "body":{"approvalType": approvalType, "businessNum": businessNum,"taskKey":taskKey,"approvalOpinion":approvalOpinion}}
  1961. access_token = dbController.getToken()
  1962. appoveApiUrl = appoveApiUrl+"?access_token="+access_token
  1963. headers = {"Content-Type": "application/json"}
  1964. jsonStr = json.dumps(data);
  1965. jsonStr = jsonStr.replace('"',"\\\"")#必须替换才行
  1966. logger.info(jsonStr)
  1967. pboc = PBOC();
  1968. encryData = pboc.encrypt(jsonStr,key)
  1969. encryData = encryData[0:len(encryData)-2]
  1970. logger.info(encryData)
  1971. response = requests.post(appoveApiUrl, data=encryData,headers=headers)
  1972. text = response.text
  1973. # p = PrpCrypt(key)
  1974. pboc = PBOC();
  1975. resultText = pboc.decrypt(text, config.get("baseconf", "AESKey"))
  1976. # resultText = p.decrypt(text)
  1977. logger.info(businessNum + "#" + "uploadAudit upload_result:" + resultText)
  1978. def uploadReportResult(basePath,pdf_path):
  1979. # ===================================
  1980. try:
  1981. fileName = os.path.basename(pdf_path)
  1982. #上传文件逻辑
  1983. logger.info(fileName+"#"+fileName+"#"+"准备上传文件")
  1984. uploadApiUrl = config.get("baseconf", "uploadApiUrl");
  1985. uploadApiUrl = uploadApiUrl + "?access_token=" + dbController.getToken()
  1986. files = {'file': open(outPath, 'rb')}
  1987. businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1988. logger.info(fileName+"#"+fileName+"#"+"businessNum:"+businessNum)
  1989. logger.info(fileName+"#"+"queryInfoCardId:" + queryInfo["queryInfoCardId"])
  1990. data = {'docType': "23", 'businessNum': businessNum}
  1991. response = requests.post(uploadApiUrl, files=files, data=data)
  1992. text = response.text
  1993. # p = PrpCrypt(config.get("baseconf", "AESKey"))
  1994. # logger.info(fileName+"#"+"token:"+token)
  1995. # logger.info(fileName+"#"+url)
  1996. # logger.info(fileName+"#"+result.text)
  1997. pboc = PBOC();
  1998. resultText = pboc.decrypt(text, config.get("baseconf", "AESKey"))
  1999. logger.info(fileName+"#"+"upload_result:" + resultText)
  2000. descPdfPath = basePath + "execed_new/" + os.path.basename(pdf_path)
  2001. if not os.path.exists(basePath+"execed_new/"):
  2002. os.mkdir(basePath+"execed_new/")
  2003. logger.info("移动文件 from " + pdf_path+" to "+descPdfPath)
  2004. shutil.move(pdf_path, descPdfPath)
  2005. except:
  2006. info = sys.exc_info()
  2007. logger.error(info[0])
  2008. logger.error(info[1])
  2009. # logging.log(logging.ERROR, info[2])
  2010. logger.error(traceback.extract_tb(info[2], 1))
  2011. def uploadReportResultXxw(json_path):
  2012. # ===================================
  2013. try:
  2014. fileName = os.path.basename(json_path)
  2015. #上传文件逻辑
  2016. logger.info("#"+json_path+"#"+"准备上传文件")
  2017. uploadApiUrl = config.get("baseconf", "uploadApiUrl");
  2018. uploadApiUrl = uploadApiUrl + "?access_token=" + dbController.getToken()
  2019. files = {'file': open(json_path, 'rb')}
  2020. businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  2021. logger.info(fileName+"#"+fileName+"#"+"businessNum:"+businessNum)
  2022. logger.info(fileName+"#"+"queryInfoCardId:" + queryInfo["queryInfoCardId"])
  2023. data = {'docType': "23", 'businessNum': businessNum}
  2024. response = requests.post(uploadApiUrl, files=files, data=data)
  2025. text = response.text
  2026. logger.info("上传结果:"+text)
  2027. pboc = PBOC();
  2028. resultText = pboc.decrypt(text,config.get("baseconf", "AESKey"))
  2029. logger.info(fileName+"#"+"uploadReportResultXxw:" + resultText)
  2030. except:
  2031. info = sys.exc_info()
  2032. logger.error(info[0])
  2033. logger.error(info[1])
  2034. # logging.log(logging.ERROR, info[2])
  2035. logger.error(traceback.extract_tb(info[2], 1))
  2036. def updateParseInd(file_name):
  2037. # 检查是否存在已执行
  2038. parseInd = "1"
  2039. try:
  2040. if file_name != "":
  2041. arCert = file_name[0:-4].split("_")
  2042. if len(arCert) == 2:
  2043. cert_id = arCert[1]
  2044. parseIndTmp = dbController.getParseInd(cert_id)
  2045. if parseIndTmp =="1":
  2046. return True
  2047. dbController.updateParseInd(cert_id, parseInd)
  2048. except:
  2049. logger.error("update parse ind error")
  2050. return False
  2051. # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig')
  2052. def moveFile(basePath,pdf_path):
  2053. descPdfPath = basePath + "execed_new/" + os.path.basename(pdf_path)
  2054. if not os.path.exists(basePath + "execed_new/"):
  2055. os.mkdir(basePath + "execed_new/")
  2056. logger.info("移动文件 from " + pdf_path + " to " + descPdfPath)
  2057. shutil.move(pdf_path, descPdfPath)
  2058. descTxtPath = basePath + "execed_txt/" + os.path.basename(pdf_path).replace("pdf", 'txt')
  2059. if not os.path.exists(basePath + "execed_txt/"):
  2060. os.mkdir(basePath + "execed_txt/")
  2061. txtPath = pdf_path.replace("pdf", 'txt')
  2062. shutil.move(txtPath, descTxtPath)
  2063. if __name__ == '__main__':
  2064. file_name = ""
  2065. # basePath = "D:/mydocument/myproject/git/busscredit/20200414_report/";
  2066. basePath = "D:/jin_rong/test_data/"
  2067. # basePath = "Z:/cr/parse/"
  2068. # file_name = "周颖500108199002111229.pdf"#准贷记卡已销户 呆账
  2069. # file_name = "王思13052819911012122X.pdf"#公积金
  2070. # file_name = "杨夏龙440902198410014270.pdf"#转出
  2071. # file_name = "翟彦超230125199004174216.pdf"#准贷记卡 呆账
  2072. # file_name = "蔡月辉330326198502116146.pdf" # 配偶
  2073. # file_name = "周芳芳342501198706111782.pdf" #被追偿信息
  2074. # file_name = "付春雁533001198507220344.pdf" # 公积金记录
  2075. # pdf_path = basePath + "陈洁350122199005027726.pdf" # 相关还款责任
  2076. # file_name = "叶翔_330126197005200077.pdf" # 准贷记卡分页
  2077. file_name = "1-1 个人信用报告展示样本(授信机构版)202004.pdf" #
  2078. # file_name = "姚钧_120101198903033539.pdf" #
  2079. pdf_path = basePath + file_name
  2080. if len(sys.argv)>1:
  2081. basePath = sys.argv[1]
  2082. pdf_path = basePath + sys.argv[2]
  2083. file_name = sys.argv[2]
  2084. # print(sys.argv)
  2085. isBat = False#批量的有问题
  2086. isPlt = config.get("baseconf", "isPlt");
  2087. if isBat:#批量生成数据不对
  2088. # print(isBat)
  2089. for file in os.listdir(basePath):
  2090. if file.endswith("pdf"):
  2091. start = timeit.default_timer();
  2092. pdf_path = basePath+file;
  2093. outPath = pdf_path.replace("pdf",'txt')
  2094. if os.path.exists(outPath):
  2095. continue;
  2096. logger.info(file + "解析开始...")
  2097. try:
  2098. result = main(pdf_path)
  2099. except:
  2100. info = sys.exc_info()
  2101. logger.error(info[0])
  2102. logger.error( info[1])
  2103. # logging.log(logging.ERROR, info[2])
  2104. logger.error(traceback.extract_tb(info[2], 1))
  2105. # print(result)
  2106. #输出到文件
  2107. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  2108. print(result.replace("\033[1;34m","").replace("\033[0m",""))
  2109. logger.info(file+"解析完成")
  2110. gc.collect()
  2111. s = timeit.default_timer() - start;
  2112. logger.info(str(s) + " 秒")
  2113. else:
  2114. if pdf_path.endswith("pdf"):
  2115. # print("啊咯哈")
  2116. start = timeit.default_timer();
  2117. outPath = pdf_path.replace("pdf", 'txt')
  2118. result = ""
  2119. if isPlt == "1":#生产模式
  2120. if not os.path.exists(outPath):#不存在才生成
  2121. try:
  2122. isExec = updateParseInd(file_name)
  2123. if not isExec:#没有在执行
  2124. # print(isExec)
  2125. logger.info(file_name + "解析开始...")
  2126. print("解析开始")
  2127. result = main(pdf_path)
  2128. print( "解析的数据")
  2129. # sys.stdout = open(outPath, mode='w', encoding='utf-8')
  2130. # print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  2131. with open(outPath, 'w', encoding='utf-8') as fp:
  2132. fp.write(result)
  2133. logger.info(file_name + "解析完成")
  2134. s = timeit.default_timer() - start;
  2135. logger.info(file_name+"#"+str(s) + " 秒")
  2136. #调用jar计算审批结果
  2137. cert_id = queryInfo["queryInfoCardId"]
  2138. productNum = dbController.getProductNum(cert_id)
  2139. if productNum != "":
  2140. if productNum == productNumJz:
  2141. # uploadReportResult(basePath, pdf_path);
  2142. invokePboc(basePath, pdf_path);
  2143. elif productNum == productNumXxw:
  2144. invokeXxw(basePath, pdf_path);
  2145. elif productNum == productNumFb:
  2146. uploadReportResult(basePath, pdf_path);
  2147. elif productNum == productNumKcd:
  2148. uploadReportResult(basePath, pdf_path);
  2149. #移动pdf和txt文件,新希望移动json
  2150. moveFile(basePath, pdf_path)
  2151. # elif productNumXy.find(productNum) >= 0:
  2152. # xyHttp.call_credit(result)
  2153. # else:
  2154. # try:
  2155. # businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]);
  2156. # localJarResult = xyHttp.callLocal(result)
  2157. # if localJarResult.get("errcode") == None:
  2158. # uploadAudit(localJarResult, businessNum)
  2159. # else:
  2160. # logger.error(localJarResult["errmsg"])
  2161. # except:
  2162. # info = sys.exc_info()
  2163. # logger.error(info[0])
  2164. # logger.error(info[1])
  2165. # # logging.log(logging.ERROR, info[2])
  2166. # logger.error(traceback.extract_tb(info[2], 1))
  2167. except:
  2168. info = sys.exc_info()
  2169. logger.error(file_name+"#"+"解析失败")
  2170. logger.error(info[0])
  2171. logger.error(info[1])
  2172. logger.error(traceback.extract_tb(info[2]))
  2173. else:#如果已经执行过了,移动文件
  2174. logger.info("移动文件"+pdf_path)
  2175. # descPdfPath = basePath + "execed/" + os.path.basename(pdf_path)
  2176. # if not os.path.exists(basePath + "execed/"):
  2177. # os.mkdir(basePath + "execed/")
  2178. # shutil.move(pdf_path, descPdfPath)
  2179. else:
  2180. isExec = updateParseInd(file_name)
  2181. if not isExec: # 没有在执行
  2182. result = main(pdf_path)
  2183. # sys.stdout = open(outPath, mode='w', encoding='utf-8')
  2184. # print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  2185. with open(outPath, 'w', encoding='utf-8') as fp:
  2186. fp.write(result)
  2187. logger.info(file_name + "解析完成")
  2188. s = timeit.default_timer() - start;
  2189. logger.info(file_name+"#"+str(s) + " 秒")
  2190. # uploadReportResult(basePath,pdf_path);
  2191. # 调用jar计算审批结果
  2192. cert_id = queryInfo["queryInfoCardId"]
  2193. productNum = dbController.getProductNum(cert_id)
  2194. if productNum != "":
  2195. if productNum == productNumJz:
  2196. invokePboc(basePath, pdf_path);
  2197. elif productNum == productNumXxw:
  2198. invokeXxw(basePath, pdf_path);
  2199. # elif productNumXy.find(productNum) >= 0:
  2200. # xyHttp.call_credit(result)
  2201. # else:
  2202. # try:
  2203. # businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]);
  2204. # localJarResult = xyHttp.callLocal(result)
  2205. # if localJarResult.get("errcode") == None:
  2206. # uploadAudit(localJarResult, businessNum)
  2207. # else:
  2208. # logger.error(localJarResult["errmsg"])
  2209. # except:
  2210. # info = sys.exc_info()
  2211. # logger.error(info[0])
  2212. # logger.error(info[1])
  2213. # # logging.log(logging.ERROR, info[2])
  2214. # logger.error(traceback.extract_tb(info[2], 1))