parseCreditPdf.py.bak 145 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992
  1. #coding=utf-8
  2. import pdfplumber
  3. import pandas as pd
  4. import numpy as np;
  5. import sys
  6. import os
  7. import traceback
  8. from prp import PrpCrypt
  9. #指标相关
  10. import loanIndexParser as lip;
  11. import payRcdIndexParser as prp;
  12. import creditCardIndexParser as cip
  13. import queryInfoIndexParser as qip
  14. import requests
  15. import utils;
  16. import time;
  17. import consts;
  18. import math
  19. import dfParser;
  20. import gc
  21. from dbController import DbController
  22. from ini_op import Config;
  23. base_dir = os.path.dirname(os.path.abspath(__file__))
  24. config = Config(base_dir+"/config.ini");
  25. #连接数据库
  26. dbController = DbController();
  27. pd.set_option('mode.chained_assignment', None)
  28. import log
  29. logger = log.logger
  30. # 查询信息
  31. dfMap = {};
  32. allHeaders = [] # 所有表头
  33. queryInfoDf = pd.DataFrame();
  34. queryInfoDf_header = ["被查询者姓名", "被查询者证件类型", "被查询者证件号码", "查询机构", "查询原因"];
  35. dfMap["queryInfoDf"] = {"df": queryInfoDf, "nextDf": None};
  36. allHeaders.append(queryInfoDf_header);
  37. # 身份信息
  38. identityDf = pd.DataFrame();
  39. identity_header = ['性别', '出生日期', '婚姻状况', '学历', '学位', '就业状况', '国籍', '电子邮箱']
  40. addressDf = pd.DataFrame(); # 通讯地址
  41. dfMap["identityDf"] = {"df": identityDf, "nextDf": None, "mobiles": None};
  42. allHeaders.append(identity_header);
  43. # 配偶信息
  44. mateDf = pd.DataFrame();
  45. mateDf_header = ['姓名', '证件类型', '证件号码', '工作单位', '联系电话']
  46. dfMap["mateDf"] = {"df": mateDf, "nextDf": None};
  47. allHeaders.append(mateDf_header);
  48. # 居住信息====暂时该信息没有用到先不解析
  49. liveInfoDf = pd.DataFrame();
  50. liveInfoDf_header = ['编号', '居住地址', '住宅电话', '居住状况', '信息更新日期']
  51. dfMap["liveInfoDf"] = {"df": liveInfoDf, "nextDf": None};
  52. allHeaders.append(liveInfoDf_header);
  53. # 职业信息
  54. occupationDf = pd.DataFrame();
  55. occupationInfo_header = ['编号', '工作单位', '单位性质', '单位地址', '单位电话']
  56. occupationInfo_header1 = ['编号', '职业', '行业', '职务', '职称', '进入本单位年份', '信息更新日期']
  57. dfMap["occupationDf"] = ({"df": occupationDf, "nextDf": None});
  58. # allHeaders.append(occupationInfo_header1);
  59. allHeaders.append(occupationInfo_header);
  60. # 上次查询记录
  61. preQueryRcd_header0 = ['上一次查询记录']
  62. allHeaders.append(preQueryRcd_header0);
  63. # 查询记录概要
  64. # queryInfoBriefDf = pd.DataFrame();
  65. # queryInfoBrief_header0 = ['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  66. # queryInfoBrief_header1 = ['贷款审批', '信用卡审批', '贷款审批', '信用卡\n审批', '本人查询', '贷后管理', '担保资格\n审查', '特约商户\n实名审查']
  67. # dfMap["queryInfoBriefDf"] = ({"df": queryInfoBriefDf, "nextDf": None});
  68. # allHeaders.append(queryInfoBrief_header0);
  69. # allHeaders.append(queryInfoBrief_header1);
  70. # 信贷交易信息提示
  71. loanTradeInfoDf = pd.DataFrame();
  72. loanTradeInfo_header = ['业务类型', '账户数', '首笔业务发放月份'];
  73. dfMap["loanTradeInfoDf"] = ({"df": loanTradeInfoDf, "nextDf": None});
  74. allHeaders.append(loanTradeInfo_header)
  75. # 信贷交易违约信息概要
  76. # 被追偿信息汇总 资产处置和垫款业务
  77. recoveryInfoSumDf = pd.DataFrame();
  78. recoveryInfoSumDf_header = ['业务种类', '账户数', '余额'];
  79. dfMap["recoveryInfoSumDf"] = ({"df": recoveryInfoSumDf, "nextDf": None});
  80. allHeaders.append(recoveryInfoSumDf_header)
  81. # 呆账信息汇总
  82. badDebtsInfoSumDf = pd.DataFrame();
  83. badDebtsInfoSumDf_header = ['账户数', '余额']; # 被追偿信息汇总
  84. dfMap["badDebtsInfoSumDf"] = ({"df": badDebtsInfoSumDf, "nextDf": None});
  85. allHeaders.append(badDebtsInfoSumDf_header)
  86. # 逾期透资信息汇总
  87. overdueInfoSumDf = pd.DataFrame();
  88. overdueInfoSumDf_header = ['账户类型', '账户数', '月份数', '单月最高逾期/透支总额', '最长逾期/透支月数']
  89. dfMap["overdueInfoSumDf"] = ({"df": overdueInfoSumDf, "nextDf": None});
  90. allHeaders.append(overdueInfoSumDf_header)
  91. # 非循环贷账户信息汇总
  92. loanAccountInfoSumDf = pd.DataFrame();
  93. loanAccountInfoSumDf_header0 = ['非循环贷账户信息汇总']
  94. loanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  95. dfMap["loanAccountInfoSumDf"] = ({"df": loanAccountInfoSumDf, "nextDf": None});
  96. allHeaders.append(loanAccountInfoSumDf_header0)
  97. allHeaders.append(loanAccountInfoSumDf_header1)
  98. # 循环额度下分账户信息汇总
  99. cycleCreditAccountInfoSumDf = pd.DataFrame();
  100. cycleCreditAccountInfoSumDf_header0 = ['循环额度下分账户信息汇总']
  101. cycleCreditAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款'],
  102. dfMap["cycleCreditAccountInfoSumDf"] = ({"df": cycleCreditAccountInfoSumDf, "nextDf": None});
  103. allHeaders.append(cycleCreditAccountInfoSumDf_header0)
  104. allHeaders.append(cycleCreditAccountInfoSumDf_header1)
  105. # 循环贷账户信息汇总
  106. cycleLoanAccountInfoSumDf = pd.DataFrame();
  107. cycleLoanAccountInfoSumDf_header0 = ['循环贷账户信息汇总']
  108. cycleLoanAccountInfoSumDf_header1 = ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  109. dfMap["cycleLoanAccountInfoSumDf"] = ({"df": cycleLoanAccountInfoSumDf, "nextDf": None});
  110. allHeaders.append(cycleLoanAccountInfoSumDf_header0)
  111. allHeaders.append(cycleLoanAccountInfoSumDf_header1)
  112. # 贷记卡账户信息汇总
  113. creditCardInfoSumDf = pd.DataFrame();
  114. creditCardInfoSumDf_header0 = ['贷记卡账户信息汇总']
  115. creditCardInfoSumDf_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  116. dfMap["creditCardInfoSumDf"] = ({"df": creditCardInfoSumDf, "nextDf": None});
  117. allHeaders.append(creditCardInfoSumDf_header0)
  118. allHeaders.append(creditCardInfoSumDf_header1)
  119. # 准贷记卡账户信息汇总
  120. creditCardInfoSumDfZ = pd.DataFrame();
  121. creditCardInfoSumDfZ_header0 = ['准贷记卡账户信息汇总']#'准贷记卡账户信息汇总'
  122. creditCardInfoSumDfZ_header1 = ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '透支余额', '最近6个月平\n均透支余额']
  123. dfMap["creditCardInfoSumDfZ"] = ({"df": creditCardInfoSumDfZ, "nextDf": None});
  124. allHeaders.append(creditCardInfoSumDfZ_header0)
  125. allHeaders.append(creditCardInfoSumDfZ_header1)
  126. #公共信息概要
  127. publicInfoBriefDf = pd.DataFrame();
  128. publicInfoBriefDf_header0 = ['公共信息汇总']
  129. dfMap["publicInfoBriefDf"] = ({"df": publicInfoBriefDf, "nextDf": None});
  130. allHeaders.append(publicInfoBriefDf_header0)
  131. #查询记录汇总
  132. queryRecordSumDf_header0=['最近1个月内的查询机构数', '最近1个月内的查询次数', '最近2年内的查询次数']
  133. queryRecordSumDf = pd.DataFrame();
  134. dfMap["queryRecordSumDf"] = ({"df": queryRecordSumDf, "nextDf": None});
  135. allHeaders.append(queryRecordSumDf_header0)
  136. # 非循环贷账户,循环额度下分账户
  137. # 循环贷账户
  138. loan_header = ['管理机构', '账户标识', '开立日期', '到期日期', '借款金额', '账户币种']
  139. loanDfs = [];
  140. dfMap["loanDfs"] = ({"dfs": loanDfs, "nextDf": []});
  141. allHeaders.append(loan_header)
  142. # 贷记卡账户
  143. creditCard_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '业务种类', '担保方式']
  144. creditCardDfs = [];
  145. dfMap["creditCardDfs"] = ({"dfs": creditCardDfs, "nextDf": []});
  146. allHeaders.append(creditCard_header)
  147. # 准备贷记卡账户
  148. creditCardZ_header = ['发卡机构', '账户标识', '开立日期', '账户授信额度', '共享授信额度', '币种', '担保方式']
  149. creditCardDfsZ = [];
  150. dfMap["creditCardDfsZ"] = ({"dfs": creditCardDfsZ, "nextDf": []});
  151. allHeaders.append(creditCardZ_header)
  152. #
  153. # 相关还款责任信息汇总 未使用到
  154. # 信贷交易信息明细
  155. # 被追偿信息 未使用到
  156. recoveryInfoDfs_header = ['管理机构','业务种类','债权接收日期','债权金额','债权转移时的还款状态']
  157. recoveryInfoDfs = [];
  158. dfMap["recoveryInfoDfs"] = ({"dfs": recoveryInfoDfs, "nextDf": []});
  159. allHeaders.append(recoveryInfoDfs_header)
  160. # 公共信息明细
  161. # 强制执行记录
  162. forceExecRcdDfs_header = ['编号', '执行法院', '执行案由', '立案日期', '结案方式']
  163. forceExecRcdDfs = [];
  164. dfMap["forceExecRcdDfs"] = ({"dfs": forceExecRcdDfs, "nextDf": []});
  165. allHeaders.append(forceExecRcdDfs_header)
  166. # 查询记录
  167. queryRecordDetailDf_header = ['编号', '查询日期', '查询机构', '查询原因']
  168. dfMap["queryRecordDetailDf"] = ({"df": pd.DataFrame(), "nextDf": []});
  169. allHeaders.append(queryRecordDetailDf_header)
  170. #住房公积金参缴记录
  171. housingFundRcdDfs_header =['参缴地', '参缴日期', '初缴月份', '缴至月份', '缴费状态', '月缴存额', '个人缴存比例', '单位缴存比例']
  172. housingFundRcdDfs = []
  173. dfMap["housingFundRcdDfs"] = ({"dfs": housingFundRcdDfs, "nextDf": []});
  174. allHeaders.append(housingFundRcdDfs_header)
  175. repaymentSumDf_header0=['相关还款责任信息汇总']
  176. dfMap["repaymentSumDf"] = ({"df": pd.DataFrame(), "nextDf": None});
  177. allHeaders.append(repaymentSumDf_header0)
  178. # 处理分页思路
  179. # df估计得放到对象里面,然后存储下一个df,一个对象里包含key
  180. # 然后判断对象的df的完整性,如果不完整代表被分页了,把nextdf合并到当前的df
  181. # 针对可合并的列的场景
  182. # =======
  183. keyList = [] # 存储所有的df的key列表
  184. # pd.Series()
  185. # 检查数据是否带表头
  186. # 应该是每一页开头的一行和每个表头对比一次,确认是不是表头,或者表头有什么共同的规律也可以看下
  187. import timeit
  188. # 定义指标部分======================start
  189. reportTime = ""; # 报告时间
  190. # 被查询者姓名
  191. queryInfoName = "";
  192. queryInfoCardId = "" # 被查询者证件号码
  193. # 定义指标部分======================end
  194. # 被查询信息-基础信息
  195. # 报告时间
  196. # 被查询者姓名
  197. # 被查询者证件号码
  198. # 基础信息
  199. queryInfo = {"reportTime":"","queryInfoCardId":""}
  200. # 身份信息
  201. identity = {}
  202. # 配偶信息
  203. mate = {}
  204. # 信贷交易信息提示-信用提示
  205. loanTradeInfo = {'perHouseLoanAccount': 0, 'perBusHouseLoanAccount': 0, 'otherLoanAccount': 0, 'loanMonthMin': 0,
  206. 'creditCardMonthMin': 0, 'creditAccount': 0, 'creditAccountZ': 0}
  207. # 逾期及违约信息概要
  208. overdueBrief = {}
  209. # 逾期及透资信息汇总
  210. # 贷款逾期账户数 loanOverdueAccount
  211. # 贷款逾期月份数 loanOverdueMonth
  212. # 贷款单月最高逾期总额 loanCurMonthOverdueMaxTotal
  213. # 贷款最长逾期月数 loanMaxOverdueMonth
  214. overdueInfo = {"loanOverdueAccount": "", "loanOverdueMonth": "", "loanCurMonthOverdueMaxTotal": "",
  215. "loanMaxOverdueMonth": "",
  216. "creditCardOverdueAccount": "", "creditCardOverdueMonth": "", "creditCardCurMonthOverdueMaxTotal": "",
  217. "creditCardMaxOverdueMonth": ""}
  218. # 未结清贷款信息汇总
  219. # ['管理机构数', '账户数', '授信总额', '余额', '最近6个月平均应还款']
  220. loanAccountInfoSum = {"mgrOrgCount": 0, "account": 0, "creditTotalAmt": 0, "balance": 0, "last6AvgPayAmt": 0}
  221. # 未销户贷记卡发卡法人机构数
  222. # 未销户贷记卡发卡机构数
  223. # 未销户贷记卡账户数
  224. # 未销户贷记卡授信总额
  225. # 未销户贷记卡单家行最高授信额
  226. # 未销户贷记卡单家行最低授信额
  227. # 未销户贷记卡已用额度
  228. # 未销户贷记卡近6月平均使用额度
  229. # 未结清贷记卡信息汇总
  230. # ['发卡机构数', '账户数', '授信总额', '单家机构最高\n授信额', '单家机构最低\n授信额', '已用额度', '最近6个月平\n均使用额度']
  231. creditCardInfoSum = {"awardOrgCount": 0, "account": 0, "creditTotalAmt": 0, "perMaxCreditTotalAmt": 0,
  232. "perMinCreditTotalAmt": 0, "useAmt": 0, "last6AvgUseAmt": 0}
  233. # 信 贷 审 批 查 询 记 录 明 细
  234. queryRecordDetail = {"last1MonthQueryTimes": 0, "last3MothLoanApproveTimes": 0, "last3MonthQueryTimes": 0,
  235. "lastTimeLoanApproveMonth": 0}
  236. #最近一笔结清贷款的贷款金额 
  237. loanAccountInfo = {"lastSettleLoanAmt": 0}
  238. loanAccountDfs=[];#横向合并
  239. creditCardAccountDfs=[];#贷记卡账户合并
  240. creditCardAccountDfsZ=[];#准贷记卡账户合并
  241. recoveryInfoAccountDfs=[];#被追偿账户合并
  242. housingFundRcdAccountDfs=[];#公积金账户合并
  243. #============================指标定义区 start=============================
  244. #基本信息 拆分
  245. # basicInfoDf = pd.DataFrame(columns=consts.basicInfoHeader, index=[0])
  246. #身份信息
  247. identityInfoIndex = '身份信息'
  248. identityInfoDf = pd.DataFrame(columns=consts.identityInfoHeader,index=[identityInfoIndex])
  249. #配偶信息
  250. mateInfoIndex = '配偶信息'
  251. mateInfoDf = pd.DataFrame(columns=consts.mateInfoHeader,index=[mateInfoIndex])
  252. #居住信息
  253. liveInfoIndex = '居住信息'
  254. liveInfoDf = pd.DataFrame(columns=consts.liveInfoHeader,index=[liveInfoIndex])
  255. #职业信息
  256. occupationInfoIndex = '职业信息'
  257. occupationInfoDf = pd.DataFrame(columns=consts.occupationInfoHeader,index=[occupationInfoIndex])
  258. #信贷交易信息提示
  259. loanTradeInfoIndex = '信贷交易信息提示'
  260. briefInfoDf_loanTradeInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeInfo,index=[loanTradeInfoIndex])
  261. #被追偿信息汇总及呆账信息汇总
  262. recoveryInfoSumIndex = '信贷交易违约信息概要'
  263. briefInfoDf_recoveryInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_recoveryInfo,index=[recoveryInfoSumIndex])
  264. #呆账信息汇总
  265. badDebtsInfoIndex = '呆账信息汇总'
  266. briefInfoDf_badDebtsInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_badDebtsInfoSum,index=[badDebtsInfoIndex])
  267. #逾期(透支)信息汇总
  268. overdueInfoSumIndex='逾期(透支)信息汇总'
  269. briefInfoDf_overdueInfoSum = pd.DataFrame(columns=consts.briefInfoHeader_overdueInfoSum,index=[overdueInfoSumIndex])
  270. #信贷交易授信及负债信息概要
  271. loanTradeCreditInfoIndex='信贷交易授信及负债信息概要'
  272. briefInfoDf_loanTradeCreditInfo = pd.DataFrame(columns=consts.briefInfoHeader_loanTradeCreditInfo,index=[loanTradeCreditInfoIndex]).fillna(0.0)
  273. #公共信息概要
  274. publicInfoBriefIndex = '公共信息概要'
  275. publicInfoBriefDf = pd.DataFrame(columns=consts.publicInfoBriefHeader,index=[publicInfoBriefIndex])
  276. #查询记录汇总
  277. queryRecordSumIndex = '查询记录汇总'
  278. queryRecordSumDf = pd.DataFrame(columns=consts.queryRecordSumHeader,index=[queryRecordSumIndex])
  279. #信贷交易明细-被追偿信息
  280. recoveryInfoIndex='被追偿信息'
  281. creditTradeDetailDf_recoveryInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_recoveryInfo,index=[recoveryInfoIndex])
  282. #信贷交易明细-特殊交易
  283. specialTradeIndex='特殊交易'
  284. creditTradeDetailHeader_specialTrade = pd.DataFrame(columns=consts.creditTradeDetailHeader_specialTrade,index=[specialTradeIndex])
  285. #信贷交易明细
  286. #非循环贷账户
  287. loanInfoIndex='非循环贷账户'
  288. creditTradeDetailDf_loanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_loanAccountInfo,index=[loanInfoIndex])
  289. #循环额度下分账户
  290. cycleCreditAccountInfoIndex='循环额度下分账户'
  291. creditTradeDetailDf_cycleCreditAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleCreditAccountInfo,index=[cycleCreditAccountInfoIndex])
  292. #循环贷账户
  293. cycleLoanAccountInfoIndex='循环贷账户'
  294. creditTradeDetailDf_cycleLoanAccountInfo = pd.DataFrame(columns=consts.creditTradeDetailHeader_cycleLoanAccountInfo,index=[cycleLoanAccountInfoIndex])
  295. #贷款信息
  296. loanAccountInfoIndex='贷款信息'
  297. loanAccountInfoDf = pd.DataFrame(columns=consts.loanAccountInfoHeader,index=[loanAccountInfoIndex])
  298. #贷记卡信息
  299. creditCardAccountInfoIndex = '贷记卡账户'
  300. creditCardAccountInfoDf = pd.DataFrame(columns=consts.creditCardAccountInfoHeader,index=[creditCardAccountInfoIndex])
  301. #准贷记卡
  302. creditCardAccountInfoIndexZ = '准贷记卡账户'
  303. creditCardAccountInfoDfZ = pd.DataFrame(columns=consts.creditCardAccountInfoHeaderZ,index=[creditCardAccountInfoIndexZ])
  304. useRateIndex = '使用率'
  305. useRateDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_useRate,index=[useRateIndex])
  306. openAccountIndex = '开户数'
  307. openAccountDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_openAccount,index=[openAccountIndex])
  308. payRcdStatusIndex = '24期还款状态'
  309. payRcdStatusDf = pd.DataFrame(columns=consts.creditTradeDetailHeader_payRcdStatus,index=[payRcdStatusIndex])
  310. #查询记录明细指标
  311. queryRecordDetailIndex = '信贷审批查询记录明细'
  312. queryRecordDetailDf = pd.DataFrame(columns=consts.queryRecordDetailHeader,index=[queryRecordDetailIndex])
  313. #住房公积金
  314. housingFundRcdIndex = '住房公积金参缴记录'
  315. housingFundRcdDf = pd.DataFrame(columns=consts.housingFundRcdHeader,index=[housingFundRcdIndex])
  316. #============================指标定义区 end=============================
  317. # 解析被查询信息指标
  318. def parseQueryInfo(dfObj):
  319. df = dfObj["df"];
  320. reportTime = df.loc[0, :][3]
  321. reportTime = reportTime.split(":")[1]
  322. reportTime = reportTime.replace(".", "-"); # 报告时间
  323. queryInfo["reportTime"] = reportTime
  324. row = df.loc[2, :]
  325. queryInfo["queryInfoName"] = row[0]; # 被查询者姓名
  326. # basicInfoDf.loc[0, '姓名'] = row[0]
  327. queryInfo["queryInfoCardId"] = row[2].replace("\n", ""); # 被查询者证件号码
  328. # basicInfoDf.loc[0, '身份证'] = row[2].replace("\n", "")
  329. # 婚姻状况
  330. # 学历
  331. # 单位电话
  332. # 住宅电话
  333. # 通讯地址
  334. def parseIdentity(dfObj):
  335. df = dfObj["df"];
  336. row1 = df.loc[1, :].dropna().reset_index(drop=True)
  337. # identity["marital"] = row1[3] # 婚姻状况
  338. # identity["education"] = row1[4] # 学历
  339. # identity["commAddress"] = row1[9].replace("\n", ""); # 通讯地址
  340. identityInfoDf.loc[identityInfoIndex, '性别'] = row1[0]
  341. identityInfoDf.loc[identityInfoIndex, '出生日期'] = dfParser.formatDate(row1[1])[0:7]
  342. identityInfoDf.loc[identityInfoIndex, '国籍'] = row1[6]
  343. identityInfoDf.loc[identityInfoIndex, '户籍地址'] = row1[9].replace("\n", "")
  344. identityInfoDf.loc[identityInfoIndex, '婚姻状况'] = row1[2]
  345. identityInfoDf.loc[identityInfoIndex, '学位'] = row1[4]
  346. identityInfoDf.loc[identityInfoIndex, '通讯地址'] = row1[8].replace("\n", "")
  347. identityInfoDf.loc[identityInfoIndex, '就业状况'] = row1[5]
  348. mobileDf = dfObj["mobileDf"];
  349. identityInfoDf.loc[identityInfoIndex, '历史手机号码数'] = mobileDf.index.size
  350. reportTime = queryInfo["reportTime"]
  351. identityInfoDf.loc[identityInfoIndex, '近3个月手机号码数'] = getLastMonthMobileCount(mobileDf,3,reportTime)
  352. identityInfoDf.loc[identityInfoIndex, '近6个月手机号码数'] = getLastMonthMobileCount(mobileDf, 6,reportTime)
  353. identityInfoDf.loc[identityInfoIndex, '近12个月手机号码数'] = getLastMonthMobileCount(mobileDf, 12,reportTime)
  354. identityInfoDf.loc[identityInfoIndex, '近24个月手机号码数'] = getLastMonthMobileCount(mobileDf, 24,reportTime)
  355. #最近几个月电话号码数
  356. def getLastMonthMobileCount(df, month,reportTime):
  357. # 当前日期
  358. last1MonthDateStr = reportTime
  359. # 最近一个月
  360. lastMonthDate = np.datetime64(last1MonthDateStr, "D") - np.timedelta64(30 * month, 'D')
  361. lastMonthMobileDf = df[df[5] >= str(lastMonthDate)]
  362. return lastMonthMobileDf.shape[0];
  363. # 配偶姓名
  364. # 配偶证件号码
  365. # 配偶工作单位
  366. # 配偶联系电话
  367. def parseMate(dfObj):
  368. df = dfObj["df"];
  369. if not df.empty:
  370. row1 = df.loc[1, :]
  371. mate["mateName"] = row1[0] # 配偶姓名
  372. mate["mateCardId"] = row1[2] # 配偶证件号码
  373. mate["mateWorkCompany"] = row1[3].replace("\n", ""); # 配偶工作单位
  374. mate["mateContactTel"] = row1[4]; # 配偶联系电话
  375. mateInfoDf.loc[mateInfoIndex, '姓名'] = row1[0]
  376. mateInfoDf.loc[mateInfoIndex, '证件号码'] = row1[2]
  377. mateInfoDf.loc[mateInfoIndex, '工作单位'] = row1[3].replace("\n", "");
  378. mateInfoDf.loc[mateInfoIndex, '联系电话'] = row1[4].replace("\n", "");
  379. #解析居住信息
  380. def parseLiveInfo(dfObj):
  381. df = dfObj["df"];
  382. if not df.empty:
  383. row1 = df.loc[1, :]
  384. liveInfoDf.loc[liveInfoIndex, '居住地址'] = row1[1]
  385. liveInfoDf.loc[liveInfoIndex, '住宅电话'] = row1[2]
  386. liveInfoDf.loc[liveInfoIndex, '历史居住地址个数'] = df.index.size-1;
  387. curDate = np.datetime64(time.strftime("%Y-%m-%d"));
  388. last3year = str(curDate)[0:4]
  389. last3yearDate = str(int(last3year)-3)+str(curDate)[4:10]
  390. lastLiveDf = df[df[4]>=last3yearDate];
  391. liveInfoDf.loc[liveInfoIndex, '最近3年内居住地址个数'] = lastLiveDf.index.size-1;
  392. houseIndex = df[df[3]=='自置'].index.size>0
  393. if (houseIndex):
  394. houseStr = '是'
  395. else:
  396. houseStr= '否'
  397. liveInfoDf.loc[liveInfoIndex, '当前居住状况-是否具有自有住房'] = houseStr;
  398. liveInfoDf.loc[liveInfoIndex, '居住状况'] = row1[3]
  399. liveInfoDf.loc[liveInfoIndex, '信息更新日期'] = row1[4]
  400. #解析职业信息
  401. def parseOccupationInfoDf(dfObj):
  402. df = dfObj["df"];
  403. if not df.empty:
  404. occIndex1 = 0#判断职业从哪行开始
  405. for i in range(0,df.index.size):
  406. if df.loc[i,:].dropna().tolist()==occupationInfo_header1:
  407. occIndex1=i;
  408. break;
  409. occDf = df[1:occIndex1].reset_index(drop=True)#工作单位
  410. occDfNew = pd.DataFrame()
  411. occDf1New = pd.DataFrame()
  412. #删除为none的列 合并的bug TODO
  413. for i in range(0,occDf.index.size):
  414. occDfNew = occDfNew.append([pd.DataFrame(occDf.iloc[i].dropna().reset_index(drop=True)).T],ignore_index=True)
  415. occDf1 = df[occIndex1+1:df.index.size].reset_index(drop=True) #职业
  416. for i in range(0,occDf1.index.size):
  417. occDf1New = occDf1New.append([pd.DataFrame(occDf1.iloc[i].dropna().reset_index(drop=True)).T], ignore_index=True)
  418. occDf = pd.concat([occDfNew, occDf1New], axis=1, ignore_index=True)#合并df
  419. row = occDf.loc[0, :].dropna()#取最新
  420. occupationInfoDf.loc[occupationInfoIndex, '工作单位'] = row[1]
  421. last3yearDate = utils.getLastMonthDate(queryInfo['reportTime'],12*3)
  422. occDf = utils.replaceDateColIdx(occDf,occDf.columns.size-1)
  423. dateIndex = occDf.columns.size-1;#日期列
  424. last3yearOccDf = occDf[occDf[dateIndex]>=last3yearDate]
  425. occupationInfoDf.loc[occupationInfoIndex, '最近3年内工作单位数'] = last3yearOccDf.index.size;
  426. occupationInfoDf.loc[occupationInfoIndex, '单位电话'] = row[4];
  427. reportTime = queryInfo['reportTime']
  428. try:
  429. minDateIndex = np.argmin(occDf[dateIndex]);
  430. maxDateIndex = np.argmax(occDf[dateIndex]);
  431. rowYearMin = occDf.loc[minDateIndex, :].dropna()
  432. rowYearMax = occDf.loc[maxDateIndex, :].dropna()
  433. if rowYearMin[10]!="--":
  434. occupationInfoDf.loc[occupationInfoIndex, '最早进入本单位年份距报告日期时长'] = int(str(np.datetime64(reportTime, "Y")))-int(rowYearMin[10])
  435. if rowYearMax[10]!="--":
  436. occupationInfoDf.loc[occupationInfoIndex, '最新进入本单位年份距报告日期时长'] = int(str(np.datetime64(reportTime, "Y")))-int(rowYearMax[10])
  437. except:
  438. logger.error("最早进入本单位年份距报告日期时长解析异常")
  439. row0 = occDf.loc[0,:].dropna().reset_index(drop=True)#最新
  440. occupationInfoDf.loc[occupationInfoIndex, '单位性质'] =row0[2]
  441. occupationInfoDf.loc[occupationInfoIndex, '单位地址'] = row0[3]
  442. occupationInfoDf.loc[occupationInfoIndex, '职业'] = row0[6]
  443. occupationInfoDf.loc[occupationInfoIndex, '行业'] = row0[7]
  444. occupationInfoDf.loc[occupationInfoIndex, '职务'] = row0[8]
  445. occupationInfoDf.loc[occupationInfoIndex, '职称'] = row0[9]
  446. occupationInfoDf.loc[occupationInfoIndex, '进入本单位年份'] = row0[10]
  447. occupationInfoDf.loc[occupationInfoIndex, '信息更新日期'] = row0[11]
  448. occupationInfoDf.loc[occupationInfoIndex, '历史工作单位数'] = occDf1.index.size
  449. # 日期相减离当前时间月份
  450. # 贷款账龄(月数)=当前日期(2020-04-01)-最小月份的1日(2019.2->2019-12-01)=4
  451. # def difMonth(dateStr):
  452. # return int(int(str(np.datetime64(time.strftime("%Y-%m-%d")) -
  453. # np.datetime64(dateStr.replace('.', '-'), "D")).split(" ")[0]) / 30);
  454. # 信贷交易明细汇总
  455. def parseLoanTradeInfo(dfObj):
  456. df = dfObj["df"];
  457. # row1 = df.loc[1, :]
  458. loanMonthDf = df[1: 4]
  459. loanMonthDf = loanMonthDf.reset_index(drop=True)
  460. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '个人住房贷款账户数'] = utils.toInt(loanMonthDf.loc[0, :][2])
  461. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex,'个人商用房贷款(包括商住两用)账户数']=utils.toInt(loanMonthDf.loc[1, :][2])
  462. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '其他类贷款账户数'] = utils.toInt(loanMonthDf.loc[2, :][2])
  463. creditCardDf = df[4: 6];
  464. creditCardDf = creditCardDf.reset_index(drop=True)
  465. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '贷记卡账户数'] = utils.toInt(creditCardDf.loc[0, :][2])
  466. briefInfoDf_loanTradeInfo.loc[loanTradeInfoIndex, '准贷记卡账户数'] = utils.toInt(creditCardDf.loc[1, :][2])
  467. # 解析呆账信息汇总
  468. def parseBadDebtsInfoSumDf(dfObj):
  469. df = dfObj["df"];
  470. if not df.empty:
  471. row1 = df.loc[2, :]
  472. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '账户数'] = row1[0];
  473. briefInfoDf_badDebtsInfoSum.loc[badDebtsInfoIndex, '余额'] = utils.replaceAmt(row1[1]);
  474. # 解析被追偿信息汇总
  475. def parseRecoveryInfoSum(dfObj):
  476. df = dfObj["df"];
  477. if not df.empty:
  478. row1 = df.loc[2, :]
  479. row2 = df.loc[3, :]
  480. row3 = df.loc[4, :]
  481. overdueBrief["disposalInfoSumAccount"] = row1[1]; # 资产处置信息汇总笔数
  482. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务账户数'] = row1[1];
  483. overdueBrief["disposalInfoSumAmt"] = row1[2]; # 资产处置信息汇总余额
  484. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '资产处置业务余额'] = utils.replaceAmt(row1[2]);
  485. overdueBrief["advanceInfoSumAccount"] = row2[1]; # 垫款业务笔数
  486. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务账户数'] = row2[1];
  487. overdueBrief["advanceInfoSumAmt"] = row2[2]; # 垫款业务余额
  488. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '垫款业务余额'] = utils.replaceAmt(row2[2]);
  489. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总账户数'] = row3[1];
  490. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '合计总余额'] = utils.replaceAmt(row3[2]);
  491. # 贷款逾期账户数
  492. # 贷款逾期月份数
  493. # 贷款单月最高逾期总额
  494. # 贷款最长逾期月数
  495. def parseOverdueInfoSum(dfObj):
  496. df = dfObj["df"];
  497. if not df.empty:
  498. row2= df.loc[2, :]
  499. row3 = df.loc[3, :]
  500. row4 = df.loc[4, :]
  501. row5 = df.loc[5, :]
  502. row6 = df.loc[6, :]
  503. #这块的数据需要进行出来 TODO
  504. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户账户数'] = utils.toInt(row2[1]);
  505. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户月份数'] = utils.toInt(row2[2]);
  506. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户单月最高逾期总额'] = utils.replaceAmt(row2[3]);
  507. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '非循环贷帐户最长逾期月数'] = utils.toInt(row2[4]);
  508. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户账户数'] = utils.toInt(row3[1]);
  509. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户月份数'] = utils.toInt(row3[2]);
  510. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户单月最高逾期总额'] = utils.replaceAmt(row3[3]);
  511. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环额度下分账户最长逾期月数'] = utils.toInt(row3[4]);
  512. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户账户数'] = utils.toInt(row4[1]);
  513. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户月份数'] = utils.toInt(row4[2]);
  514. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户单月最高逾期总额'] = utils.replaceAmt(row4[3]);
  515. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '循环贷账户最长逾期月数'] = utils.toInt(row4[4]);
  516. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户账户数'] = utils.toInt(row5[1]);
  517. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户月份数'] = utils.toInt(row5[2]);
  518. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户单月逾期总额'] = utils.replaceAmt(row5[3]);
  519. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '贷记卡账户最长逾期月数'] = utils.toInt(row5[4]);
  520. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户账户数'] = utils.toInt(row6[1]);
  521. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户月份数'] = utils.toInt(row6[2]);
  522. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户单月透支总额'] = utils.replaceAmt(row6[3]);
  523. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '准贷记卡账户最长透支月数'] = utils.toInt(row6[4]);
  524. overdueInfoAccountDf = df[df[1] != '--'];
  525. overdueInfoAccountDf = overdueInfoAccountDf[2:7]
  526. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户最长逾期/透支月数最大值']=np.max(overdueInfoAccountDf[4].astype('int'))
  527. #np.sum(overdueInfoAccountDf[1])
  528. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户所有逾期账户数加总']= np.sum(overdueInfoAccountDf[1].astype('int'))# TODO
  529. # briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目']=None# TODO
  530. # 未结清贷款法人机构数 从“未结清贷款信息汇总”中直接提取LoanLegalOrgNum
  531. # 未结清贷款机构数 从“未结清贷款信息汇总”中直接提取LoanOrgNum
  532. # 未结清贷款笔数 从“未结清贷款信息汇总”中直接提取CountNum
  533. # 未结清贷款合同总额 从“未结清贷款信息汇总”中直接提取ContractProfits
  534. # 未结清贷款合同余额 从“未结清贷款信息汇总”中直接提取Balance
  535. # 未结清贷款近6月平均应还款 从“未结清贷款信息汇总”中直接提取Last6MothsAvgRepayAmount
  536. # 个人贷款未结清笔数 "从“未结清贷款信息汇总”计算客户符合以下条件的贷款笔数
  537. # 1.贷款类型不为('%个人助学贷款%' ,'%农户贷款%')
  538. # 2.贷款额度>100元
  539. # 3.贷款状态不为“结清”"
  540. # 非循环贷账户信息汇总
  541. def doFilterCalc(dfx):
  542. dfx = dfx.replace('--', 0)
  543. return dfx;
  544. # 科学计数法转换
  545. def replaceAmt(dfx):
  546. return dfx.str.replace(',', '')
  547. # 非循环贷账户信息汇总
  548. def parseLoanAccountInfoSum(dfObj):
  549. df = dfObj["df"];
  550. if not df.empty:
  551. loanAccountInfoSumDf = df[2:3];
  552. loanAccountInfoSumDf = doFilterCalc(loanAccountInfoSumDf); # 替换--为0
  553. loanAccountInfoSumDf = loanAccountInfoSumDf.reset_index(drop=True)
  554. row0 = loanAccountInfoSumDf.loc[0,:]
  555. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户管理机构数'] = int(row0[0])
  556. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'] = int(row0[1])
  557. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  558. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  559. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  560. # 循环额度下分账户
  561. def parseCycleCreditAccountInfoSum(dfObj):
  562. df = dfObj["df"];
  563. if not df.empty:
  564. cycleCreditAccountInfoSumDf = df[2:3];
  565. cycleCreditAccountInfoSumDf = doFilterCalc(cycleCreditAccountInfoSumDf); # 替换--为0
  566. cycleCreditAccountInfoSumDf = cycleCreditAccountInfoSumDf.reset_index(drop=True)
  567. row0 = cycleCreditAccountInfoSumDf.loc[0,:]
  568. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户管理机构数'] = int(row0[0])
  569. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'] = int(row0[1])
  570. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额'] = int(utils.replaceAmt(row0[2]))
  571. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额'] = int(utils.replaceAmt(row0[3]))
  572. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  573. # 循环贷账户信息
  574. def parseCyleLoanAccountInfoSum(dfObj):
  575. df = dfObj["df"];
  576. if not df.empty:
  577. cycleLoanAccountInfoSumDf = df[2:3];
  578. cycleLoanAccountInfoSumDf = doFilterCalc(cycleLoanAccountInfoSumDf); # 替换--为0
  579. cycleLoanAccountInfoSumDf = cycleLoanAccountInfoSumDf.reset_index(drop=True)
  580. row0 = cycleLoanAccountInfoSumDf.loc[0,:]
  581. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户管理机构数'] = int(row0[0])
  582. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户账户数'] = int(row0[1])
  583. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额'] = int(utils.replaceAmt(row0[2]))
  584. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额'] = int(utils.replaceAmt(row0[3]))
  585. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户6月平均应还款'] = int(utils.replaceAmt(row0[4]))
  586. # 解析贷记卡信息汇总,包含准贷记卡
  587. def parseCreditCardInfoSum(dfObj):
  588. df = dfObj["df"];
  589. if not df.empty:
  590. creditCardInfoSumDf = df[2:3];
  591. creditCardInfoSumDf = doFilterCalc(creditCardInfoSumDf); # 替换--为0
  592. creditCardInfoSumDf = creditCardInfoSumDf.reset_index(drop=True)
  593. row0 = creditCardInfoSumDf.loc[0, :]
  594. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡发卡机构数'] = int(row0[0])
  595. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡账户数'] = int(row0[1])
  596. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  597. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  598. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  599. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  600. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  601. # 解析贷记卡信息汇总,包含准贷记卡
  602. def parseCreditCardInfoSumZ(dfObj):
  603. df = dfObj["df"];
  604. if not df.empty:
  605. creditCardInfoSumDfZ = df[2:3];
  606. creditCardInfoSumDfZ = doFilterCalc(creditCardInfoSumDfZ);
  607. creditCardInfoSumDfZ = creditCardInfoSumDfZ.reset_index(drop=True)
  608. row0 = creditCardInfoSumDfZ.loc[0, :]
  609. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡发卡机构数'] = int(row0[0])
  610. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡账户数'] = int(row0[1])
  611. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额'] = int(utils.replaceAmt(row0[2]))
  612. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最高授信额'] = int(utils.replaceAmt(row0[3]))
  613. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡单家机构最低授信额'] = int(utils.replaceAmt(row0[4]))
  614. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度'] = int(utils.replaceAmt(row0[5]))
  615. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡最近6个月平均使用额度'] = int(utils.replaceAmt(row0[6]))
  616. #相关还款责任
  617. def parseRepaymentSum(dfObj):
  618. df = dfObj["df"];
  619. if not df.empty:
  620. row4 = df.loc[4,:].dropna().reset_index(drop=True)#第4行 为个人
  621. row8 = None
  622. if df.index.size ==9:
  623. row8 = df.loc[8,:].dropna().reset_index(drop=True)#第8行 为企业
  624. perAccountNum = 0;#个人账户数
  625. orgAccountNum = 0; # 企业账户数
  626. totalAccountNum = 0;#总账户数
  627. guaranteeAccountNum = 0;#相关还款责任总账户数-担保责任
  628. otherAccountNum =0;#相关还款责任总账户数-其他
  629. perGuaranteeAmt = 0#个人担保金额及其他
  630. orgGuaranteeAmt = 0#企业担保金额及其他
  631. totalGuaranteeAmt = 0;#总担保金额
  632. guaranteeAmt = 0;#相关还款责任总担保金额
  633. otherPaymentAmt = 0;#其他还款责任金额
  634. perGuaranteeBalance = 0 # 个人担保余额及其他
  635. orgGuaranteeBalance = 0 # 企业担保余额及其他
  636. totalGuaranteeBalance = 0;#总担保余额
  637. guaranteeBalance = 0;#相关还款责任总担保余额
  638. otherPaymentBalance = 0; # 其他还款责任余额
  639. #计算总账户数
  640. if row4[0] !="--":
  641. perAccountNum=perAccountNum+utils.toInt(row4[0])
  642. guaranteeAccountNum = guaranteeAccountNum + utils.toInt(row4[0])#个人担保责任账户数
  643. if row4[3] !="--":
  644. perAccountNum = perAccountNum + utils.toInt(row4[3])#其他
  645. otherAccountNum = otherAccountNum + utils.toInt(row4[3]) # 其他
  646. if row8 != None:
  647. if row8[0] != "--":
  648. orgAccountNum = orgAccountNum + utils.toInt(row8[0])
  649. guaranteeAccountNum = guaranteeAccountNum + utils.toInt(row8[0])#企业担保责任账户数
  650. if row8[3] != "--":
  651. orgAccountNum = orgAccountNum + utils.toInt(row8[3])#其他
  652. otherAccountNum = otherAccountNum + utils.toInt(row8[3]) # 其他
  653. totalAccountNum = perAccountNum+orgAccountNum
  654. #计算担保金额
  655. if row4[1] !="--":
  656. perGuaranteeAmt=perGuaranteeAmt+utils.replaceAmt(row4[1])#担保
  657. guaranteeAmt = guaranteeAmt + utils.replaceAmt(row4[1]) # 担保
  658. if row4[4] !="--":
  659. perGuaranteeAmt = perGuaranteeAmt + utils.replaceAmt(row4[4])#其他
  660. otherPaymentAmt = otherPaymentAmt + utils.replaceAmt(row4[4]) # 其他
  661. if row8 != None:
  662. if row8[1] != "--":
  663. orgGuaranteeAmt = orgGuaranteeAmt + utils.replaceAmt(row8[1])#担保
  664. guaranteeAmt = guaranteeAmt + utils.replaceAmt(row8[1]) # 担保
  665. if row8[4] != "--":
  666. orgGuaranteeAmt = orgGuaranteeAmt + utils.replaceAmt(row8[4])#其他
  667. otherPaymentAmt = otherPaymentAmt + utils.replaceAmt(row8[4]) # 其他
  668. totalGuaranteeAmt = perGuaranteeAmt + orgGuaranteeAmt
  669. # 计算余额
  670. if row4[2] !="--":
  671. perGuaranteeBalance=perGuaranteeBalance+utils.replaceAmt(row4[2])
  672. guaranteeBalance=guaranteeBalance+utils.replaceAmt(row4[2])#个人担保余额
  673. if row4[5] !="--":
  674. perGuaranteeBalance = perGuaranteeBalance + utils.replaceAmt(row4[5])#其他
  675. otherPaymentBalance = otherPaymentBalance + utils.replaceAmt(row4[5]) # 其他
  676. if row8 != None:
  677. if row8[2] != "--":
  678. orgGuaranteeBalance = orgGuaranteeBalance + utils.replaceAmt(row8[2])
  679. guaranteeBalance = guaranteeBalance + utils.replaceAmt(row8[2])#企业担保余额
  680. if row8[5] != "--":
  681. orgGuaranteeBalance = orgGuaranteeBalance + utils.replaceAmt(row8[5])
  682. otherPaymentBalance = otherPaymentBalance + utils.replaceAmt(row8[5]) # 其他
  683. totalGuaranteeBalance = perGuaranteeBalance + orgGuaranteeBalance
  684. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数(担保+其他+个人+企业)'] =totalAccountNum
  685. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额+总还款责任金额(个人+企业)'] =totalGuaranteeAmt
  686. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额+总其他余额(个人+企业)'] =totalGuaranteeBalance
  687. if totalGuaranteeAmt !=0:
  688. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额+总其他余额(个人+企业)/相关还款责任账户总担保金额+总其他金额(个人+企业)'] =\
  689. round(totalGuaranteeBalance / totalGuaranteeAmt, 2)
  690. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任担保总账户数-个人'] =perAccountNum
  691. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-个人'] =perGuaranteeAmt
  692. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-个人'] =perGuaranteeBalance
  693. if perGuaranteeBalance !=0:
  694. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-个人/相关还款责任总担保金额-个人'] = round(perGuaranteeBalance/perGuaranteeBalance,2)
  695. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-企业'] =orgAccountNum
  696. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-企业'] =orgGuaranteeAmt
  697. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-企业'] =orgGuaranteeBalance
  698. if orgGuaranteeAmt!=0:
  699. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-企业/相关还款责任总担保金额-企业'] = round(orgGuaranteeBalance/orgGuaranteeAmt,2)
  700. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-担保责任'] =guaranteeAccountNum
  701. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-担保责任'] =guaranteeAmt
  702. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额-担保责任'] =guaranteeBalance
  703. if guaranteeAmt!=0:
  704. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-担保责任/相关还款责任总担保金额-担保责任'] =round(guaranteeBalance/guaranteeAmt,2)
  705. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总账户数-其他'] =otherAccountNum
  706. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保金额-其他'] =otherPaymentAmt
  707. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任总担保余额-其他'] =otherPaymentBalance
  708. if otherPaymentAmt!=0:
  709. briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '相关还款责任账户总担保余额-其他/相关还款责任账户总担保金额-其他'] =round(otherPaymentBalance/otherPaymentAmt,2)
  710. #解析公共信息汇总
  711. def parsePublicInfoBrief(dfObj):
  712. df = dfObj["df"];
  713. if not df.empty:
  714. publicInfoBrief = df[1:6];
  715. publicInfoBrief = publicInfoBrief.reset_index(drop=True)
  716. row0 = publicInfoBrief.loc[0, :]
  717. row1 = publicInfoBrief.loc[1, :]
  718. row2 = publicInfoBrief.loc[2, :]
  719. row3 = publicInfoBrief.loc[3, :]
  720. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-记录数'] = int(row0[1])
  721. publicInfoBriefDf.loc[publicInfoBriefIndex, '欠税信息-涉及金额'] = int(utils.replaceAmt(row0[2]))
  722. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-记录数'] = int(row1[1])
  723. publicInfoBriefDf.loc[publicInfoBriefIndex, '民事判决信息-涉及金额'] = int(utils.replaceAmt(row1[2]))
  724. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-记录数'] = int(row2[1])
  725. publicInfoBriefDf.loc[publicInfoBriefIndex, '强制执行信息-涉及金额'] = int(utils.replaceAmt(row2[2]))
  726. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-记录数'] = int(row3[1])
  727. publicInfoBriefDf.loc[publicInfoBriefIndex, '行政处罚信息-涉及金额'] = int(utils.replaceAmt(row3[2]))
  728. #解析查询信息汇总
  729. def parseQueryRecordSum(dfObj):
  730. df = dfObj["df"];
  731. if not df.empty:
  732. queryRecordSumDfTmp = df[2:3];
  733. queryRecordSumDfTmp = queryRecordSumDfTmp.reset_index(drop=True)
  734. row0 = queryRecordSumDfTmp.loc[0, :]
  735. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-贷款审批'] =int(row0[0])
  736. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询机构数-信用卡审批'] =int(row0[1])
  737. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-贷款审批'] =int(row0[2])
  738. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-信用卡审批'] =int(row0[3])
  739. queryRecordSumDf.loc[queryRecordSumIndex, '近1月内的查询次数-本人查询'] =int(row0[4])
  740. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-贷后管理'] =int(row0[5])
  741. queryRecordSumDf.loc[queryRecordSumIndex, '近2年内的查询次数-担保资格审查'] =int(row0[6])
  742. # 解析查询记录明细
  743. def parseQueryInfoDetail(dfObj):
  744. df = dfObj["df"];
  745. reportTime = queryInfo["reportTime"];
  746. if not df.empty:
  747. df = utils.replaceDateCol(df)
  748. df = df[1:df.index.size] # 去掉表头
  749. queryRecordDetailDf.loc[queryRecordDetailIndex, '近1月查询次数'] =qip.getLastMonthQueryTimes(df, 1, "",reportTime)
  750. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数'] =qip.getLastMonthQueryTimes(df, 3, "",reportTime)
  751. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数'] =qip.getLastMonthQueryTimes(df, 6, "",reportTime)
  752. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数'] =qip.getLastMonthQueryTimes(df, 12, "",reportTime)
  753. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近1个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 1, "", reportTime)
  754. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近3个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 3, "", reportTime)
  755. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 6, "", reportTime)
  756. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 12, "", reportTime)
  757. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月查询机构数'] =qip.getLastMonthQueryOrgTimes(df, 24, "", reportTime)
  758. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 3, consts.loanApprove, reportTime)
  759. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 3, consts.creditCard, reportTime)
  760. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数贷款审批'] =qip.getLastMonthQueryTimes(df, 6, consts.loanApprove, reportTime)
  761. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询次数信用卡审批'] = qip.getLastMonthQueryTimes(df, 6, consts.creditCard, reportTime)
  762. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数贷款审批'] = qip.getLastMonthQueryTimes(df, 12, consts.loanApprove, reportTime)
  763. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询次数信用卡审批'] =qip.getLastMonthQueryTimes(df, 12, consts.creditCard, reportTime)
  764. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.loanApprove, reportTime)
  765. queryRecordDetailDf.loc[queryRecordDetailIndex, '近3月查询机构数信用卡审批'] =qip.getLastMonthQueryOrgTimes(df, 3, consts.creditCard, reportTime)
  766. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数贷款审批'] =qip.getLastMonthQueryOrgTimes(df, 6, consts.loanApprove, reportTime)
  767. queryRecordDetailDf.loc[queryRecordDetailIndex, '近6月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.creditCard,reportTime)
  768. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数贷款审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanApprove, reportTime)
  769. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12月查询机构数信用卡审批'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.creditCard,reportTime)
  770. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.insuranceAprove,reportTime)
  771. queryRecordDetailDf.loc[queryRecordDetailIndex, '近12个月担保资格审查查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.insuranceAprove,reportTime)
  772. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近6个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 6, consts.loanAfterMgr,reportTime)
  773. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近12个月贷后管理查询次数'] = qip.getLastMonthQueryOrgTimes(df, 12, consts.loanAfterMgr,reportTime)
  774. queryRecordDetailDf.loc[queryRecordDetailIndex, '最后一次查询距离现在的月数贷款审批'] = qip.getLastTimeQueryMonth(df, consts.loanApprove,reportTime)
  775. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷后管理查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanAfterMgr, reportTime)
  776. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月贷款审批审批次数'] = qip.getLastMonthQueryTimes(df, 24, consts.loanApprove, reportTime)
  777. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月信用卡审批查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.creditCard,reportTime)
  778. queryRecordDetailDf.loc[queryRecordDetailIndex, '最近24个月担保资格审查查询次数'] = qip.getLastMonthQueryTimes(df, 24, consts.insuranceAprove,reportTime)
  779. #解析住房公积金
  780. def parseHousingFundRcd(df):
  781. if not df.empty:
  782. lastHousingFundRcdDf = df.sort_values(by=["信息更新日期"] , ascending=(False)).reset_index(drop=True)
  783. lastHousingFundRcdDf = lastHousingFundRcdDf[0:1]#最新
  784. row1 = lastHousingFundRcdDf.loc[0,:].dropna().reset_index(drop=True)
  785. housingFundRcdDf.loc[housingFundRcdIndex, '参缴地'] =row1[1]
  786. housingFundRcdDf.loc[housingFundRcdIndex, '参缴日期'] =row1[2]
  787. housingFundRcdDf.loc[housingFundRcdIndex, '初缴月份'] =row1[3]#初缴日期
  788. housingFundRcdDf.loc[housingFundRcdIndex, '缴至月份'] =row1[4]
  789. housingFundRcdDf.loc[housingFundRcdIndex, '缴费状态'] =row1[5]
  790. housingFundRcdDf.loc[housingFundRcdIndex, '月缴存额'] =row1[6]
  791. housingFundRcdDf.loc[housingFundRcdIndex, '个人存缴比例'] =row1[7]
  792. housingFundRcdDf.loc[housingFundRcdIndex, '单位存缴比例'] =row1[8]
  793. housingFundRcdDf.loc[housingFundRcdIndex, '缴费单位'] =row1[9]#扣缴单位
  794. housingFundRcdDf.loc[housingFundRcdIndex, '信息更新日期'] =row1[10]
  795. reportTime = queryInfo["reportTime"];
  796. lastDateStr = utils.getLastMonthDate(reportTime,12)
  797. avgHousingFundDf = df[df['缴至月份']>=lastDateStr]
  798. housingFundRcdDf.loc[housingFundRcdIndex, '最近1年公积金平均值'] = round(np.mean(avgHousingFundDf['月缴存额']),2)
  799. lastDateStr = utils.getLastMonthDate(reportTime, 12*3)
  800. avgHousingFundDf = df[df['缴至月份'] >= lastDateStr]
  801. housingFundRcdDf.loc[housingFundRcdIndex, '最近3年公积金平均值']= round(np.mean(avgHousingFundDf['月缴存额']),2)
  802. #解析贷款还款记录指标
  803. def parseLoanMergeAndPayRecordDf(df,payRcdDf):
  804. if not df.empty and not payRcdDf.empty:
  805. #正常
  806. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  807. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  808. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  809. #临时保存,不用过滤还款状态为0的
  810. payRcdMaxOverdueDf = overduePayRcdDf;
  811. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态']>0]
  812. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  813. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期账户数']/df.index.size,2)
  814. #存在逾期的贷款账户 非结清的过滤出逾期的账户号
  815. overdueLoanDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  816. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] = overdueLoanDf['管理机构'].unique().size
  817. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数占比'] = round(loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款逾期机构数'] / df['管理机构'].unique().size,2)
  818. #还款记录按日期排序最近3笔的最大逾期期数
  819. loanAccountInfoDf.loc[loanAccountInfoIndex, '近1月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf,1);
  820. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  821. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  822. loanAccountInfoDf.loc[loanAccountInfoIndex, '近9月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  823. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款的最大逾期期数'] = prp.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  824. reportTime = queryInfo["reportTime"]
  825. loanAccountInfoDf.loc[loanAccountInfoIndex, '近24月贷款最大逾期距离现在的月数'] = prp.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf,reportTime, 24);
  826. payStatus= ["G","D","C","N","M","1","2","3","4","5","6","7"]
  827. # 贷款24期还款记录次数 剔除结清 转出 呆账
  828. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  829. #从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数,MAX(各账户的还款次数)
  830. payRcdTimesDf = payRcdTimesDf[payRcdTimesDf['还款状态'].isin(payStatus)]
  831. payRcdTimes = payRcdTimesDf.groupby(['账户编号'])['还款状态'].count()
  832. #payRcdDf[(payRcdDf['还款状态']!='') & (payRcdDf['账户编号']==1)].index.size
  833. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款24期还款记录次数'] = np.max(payRcdTimes)
  834. #解析信贷交易明细-特殊交易
  835. def parseSpecialTrade(df):
  836. if not df.empty:
  837. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '当前用户发生特殊交易的严重程度'] = np.max(df['严重程度'])#加工的指标
  838. maxChangeMonthIndex = np.argmax(np.abs(df['变更月数']))
  839. meanMonthValue = np.mean(np.abs(df['变更月数']))
  840. row0 = df.loc[maxChangeMonthIndex, :]
  841. settleDf = df[(df['特殊交易类型']=='提前结清') | (df['特殊交易类型']=='提前还款')]
  842. debtDf = df[(df['特殊交易类型'] == '以资抵债')]
  843. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户发生特殊交易变更月数的最大差值'] = row0[3]
  844. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户发生特殊交易变更月数的平均差值'] = round(meanMonthValue,2)
  845. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户特殊交易涉及的发生金额的最大值'] = np.max(df['发生金额'])
  846. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户特殊交易涉及的发生金额的平均值'] = round(np.mean(df['发生金额']),2)
  847. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户所有帐户发生提前还款交易的次数统计'] = settleDf.index.size
  848. creditTradeDetailHeader_specialTrade.loc[specialTradeIndex, '用户所有帐户发生不良特殊交易的次数统计'] = debtDf.index.size;
  849. #信贷交易明细-非循环贷账户
  850. def parseLoanAccountInfo(df):
  851. if not df.empty:
  852. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  853. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')].reset_index(drop=True)
  854. normalDf = normalDf[0:loanAccountNum]#根据非循环贷账户数进行计算进行截取
  855. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  856. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  857. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  858. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  859. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  860. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  861. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  862. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  863. creditTradeDetailDf_loanAccountInfo.loc[loanInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  864. #信贷交易明细-循环额度分账户
  865. def parseCycleCreditAccountInfo(df):
  866. if not df.empty:
  867. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')].reset_index(drop=True)
  868. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  869. cycleCreditAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'])
  870. normalDf = normalDf[loanAccountNum:(loanAccountNum + cycleCreditAccountNum)]
  871. if not normalDf.empty:
  872. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  873. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  874. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  875. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  876. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  877. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  878. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  879. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  880. creditTradeDetailDf_cycleCreditAccountInfo.loc[cycleCreditAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  881. #信贷交易明细-循环贷账户
  882. def parseCycleLoanAccountInfo(df):
  883. if not df.empty:
  884. normalDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  885. loanAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户账户数'])
  886. cycleCreditAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户账户数'])
  887. cycleAccountNum = int(briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户账户数'])
  888. normalDf = normalDf[(loanAccountNum+cycleCreditAccountNum):normalDf.index.size]
  889. if not normalDf.empty:
  890. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月应还款(合计)'] = np.sum(normalDf['本月应还款'])
  891. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '本月实还款(合计)'] = np.sum(normalDf['本月实还款'])
  892. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '最近一次还款日期'] = np.max(normalDf['最近一次还款日期'])
  893. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期期数'] = np.sum(normalDf['当前逾期期数'])
  894. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '当前一共逾期总额'] = np.sum(normalDf['当前逾期总额'])
  895. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期31-60天未还本金(合计)'] = np.sum(normalDf['逾期31-60天未还本金'])
  896. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期61-90天未还本金(合计)'] = np.sum(normalDf['逾期61-90天未还本金'])
  897. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期91-180天未还本金(合计)'] = np.sum(normalDf['逾期91-180天未还本金'])
  898. creditTradeDetailDf_cycleLoanAccountInfo.loc[cycleLoanAccountInfoIndex, '逾期180天以上未还本金(合计)']= np.sum(normalDf['逾期180天以上未还本金'])
  899. #解析贷款账户信息指标
  900. def parseLoanMergeDf(df):
  901. if not df.empty:
  902. sortDf = df.sort_values(by=["账户关闭日期","借款金额(本金)"] , ascending=(False,False))
  903. sortDf = sortDf[sortDf['账户状态'] == '结清'];
  904. sortDf = sortDf.reset_index(drop=True)
  905. if not sortDf.empty:
  906. row0 = sortDf.loc[0, :]
  907. loanAccountInfo["lastSettleLoanAmt"] = row0['借款金额(本金)']
  908. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的贷款金额'] = row0['借款金额(本金)']
  909. openDate = dfParser.formatDate(row0['开立日期'])
  910. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的发放距今月数'] = utils.difMonthReportTime(openDate,queryInfo["reportTime"])
  911. settleDate = dfParser.formatDate(row0['账户关闭日期'])
  912. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近一笔结清贷款的结清距今月数'] = utils.difMonthReportTime(settleDate,queryInfo["reportTime"])
  913. loanAccountInfoDf.loc[loanAccountInfoIndex, '历史贷款总法人机构数'] = df['管理机构'].unique().size
  914. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前同时在用的贷款机构数'] = df[df['余额(本金)']>0]['管理机构'].unique().size
  915. statusDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  916. bankDf = statusDf[statusDf['管理机构'].str.contains('银行')]
  917. #没有记录
  918. if statusDf.index.size==0:
  919. isNotBankCust = -1
  920. else:
  921. if bankDf.index.size >0:#有一条以上不为结清,请包含银行
  922. isNotBankCust = 1;
  923. else:
  924. isNotBankCust = 0;
  925. loanAccountInfoDf.loc[loanAccountInfoIndex, '是否有非银行贷款客户'] = isNotBankCust
  926. #最严重的五级分类
  927. # fiveType = ""
  928. # for fiveTypeTmp in consts.fiveType:
  929. # fiveTypeDf = statusDf[statusDf['五级分类']==fiveTypeTmp];
  930. # if not fiveTypeDf.empty:
  931. # fiveType = fiveTypeTmp;
  932. # break;
  933. # loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款五级分类'] = fiveType
  934. #当前贷款LTV
  935. # 从“贷款信息”中提取,剔除“账户状态”为结清及转出,并剔除“账户状态”为呆账且本金余额 = 0
  936. # 的记录后,SUM(本金余额) / SUM(贷款本金)
  937. # 如本金余额为空和贷款本金为0或为空,则当条记录不计算
  938. loanLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['借款金额(本金)']>0) & (df['余额(本金)']!='--')]
  939. badSetDf = loanLtvDf[~((loanLtvDf['账户状态'] == '呆账') & (loanLtvDf['余额(本金)']==0))]
  940. balanceSum = np.sum(badSetDf['余额(本金)'].astype('int'))
  941. loanAmtSum = np.sum(badSetDf['借款金额(本金)'].astype('int'))
  942. if(loanAmtSum !=0):
  943. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款LTV'] = round(np.divide(balanceSum,loanAmtSum),2)
  944. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最高LTV'] = round(np.max(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))),2)
  945. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最低LTV'] = round(np.min(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  946. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款平均LTV'] = round(np.mean(np.divide(badSetDf['余额(本金)'].astype('int'), badSetDf['借款金额(本金)'].astype('int'))), 2)
  947. #['个人住房商业贷款','个人商用房(含商住两用)贷款','个人住房公积金贷款','房'],
  948. houseLtvList = consts.houseLtvList;
  949. # houseLtvDf = badSetDf[badSetDf['业务种类'].isin(houseLtvList)]
  950. # if not houseLtvDf.empty:
  951. # loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = round(np.divide(np.sum(houseLtvDf['余额(本金)'].astype('int')),np.sum(houseLtvDf['借款金额(本金)'].astype('int'))), 2)
  952. #['个人住房贷款','个人商用房(包括商住两用)贷款']
  953. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前房贷LTV'] = lip.getCurLtv(badSetDf, houseLtvList)
  954. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款机构数量'] = loanLtvDf['管理机构'].unique().size
  955. cardLtvList = ['个人汽车消费贷款','车']
  956. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前车贷LTV'] = lip.getCurLtv(badSetDf, cardLtvList)
  957. operateLtvList = ['个人经营性贷款']
  958. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前经营贷LTV'] = lip.getCurLtv(badSetDf, operateLtvList)
  959. consumeLtvList = ['其他个人消费贷款']
  960. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消费贷LTV'] = lip.getCurLtv(badSetDf, consumeLtvList)
  961. bankLtvList = ['商业银行','外资银行','村镇银行','住房储蓄银行','财务公司']
  962. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前银行贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  963. bankLtvList = ['消费金融公司','汽车金融公司','信托公司']# TODO
  964. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前消金贷LTV'] = lip.getCurBankLtv(badSetDf, bankLtvList)
  965. smallLoanLtvList = ['小额信贷公司']
  966. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前小贷LTV'] = lip.getCurBankLtv(badSetDf, smallLoanLtvList)
  967. #当前贷款最大逾期期数
  968. # 从“贷款信息”中提取,剔除“账户状态”为结清、转出、呆账、呆帐后,MAX(每笔贷款的当前逾期期数)
  969. loanOverdueLtvDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出') & (df['账户状态'] != '呆账')]
  970. if not loanOverdueLtvDf.empty:
  971. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数'] = np.max(loanOverdueLtvDf['当前逾期期数'])
  972. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期金额'] = np.max(loanOverdueLtvDf['当前逾期总额'])
  973. loanOverdueLtvDf=loanOverdueLtvDf.reset_index(drop=True)
  974. maxOverdueIndex = np.argmax(loanOverdueLtvDf['当前逾期期数'])
  975. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款最大逾期期数对应的最大逾期金额'] = loanOverdueLtvDf.loc[maxOverdueIndex,:]['当前逾期总额']
  976. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最高贷款本金'] = lip.getLastLoanAmtMax(df,queryInfo["reportTime"],3)#贷款指标加工单独放到一个文件里
  977. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 3)
  978. loanAccountInfoDf.loc[loanAccountInfoIndex, '近3月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 3)
  979. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 6)
  980. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 6)
  981. loanAccountInfoDf.loc[loanAccountInfoIndex, '近6月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 6)
  982. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最高贷款本金'] = lip.getLastLoanAmtMax(df, queryInfo["reportTime"], 12)
  983. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户最低贷款本金'] = lip.getLastLoanAmtMin(df, queryInfo["reportTime"], 12)
  984. loanAccountInfoDf.loc[loanAccountInfoIndex, '近12月开户平均贷款本金'] = lip.getLastLoanAmtAvg(df, queryInfo["reportTime"], 12)
  985. lastLoanDf = loanOverdueLtvDf;
  986. if not lastLoanDf.empty:
  987. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款最近一次还款日期距今时长'] = lip.getLastPayDateMinDays(lastLoanDf,queryInfo["reportTime"])
  988. normalDf = df[(df['账户状态'] == '正常') & (df['当前逾期期数'] == 0)]
  989. #未结清贷款总账户数:账户状态不等于结清和转出的记录数
  990. notSettleDf = df[(df['账户状态'] != '结清') & (df['账户状态'] != '转出')]
  991. if not notSettleDf.empty:
  992. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数'] = normalDf.index.size
  993. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户数占比'] = round(normalDf.index.size/notSettleDf.index.size,2)
  994. #当前未结清贷款余额总和
  995. # ltvDf = tmpDf[tmpDf['业务种类'].isin(bizTypeList)]
  996. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  997. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清贷款余额总和'] = np.sum(notSettleDf['余额(本金)'])
  998. # 当前未结清住房贷款余额总和
  999. houseDf = notSettleDf[notSettleDf['业务种类'].isin(houseLtvList)]
  1000. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清住房贷款余额总和'] = np.sum(houseDf['余额(本金)'])
  1001. # 当前未结清汽车贷款余额总和
  1002. cardDf = notSettleDf[notSettleDf['业务种类'].isin(cardLtvList)]
  1003. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清汽车贷款余额总和'] = np.sum(cardDf['余额(本金)'])
  1004. # 当前未结清个人经营性贷款余额总和
  1005. operateLtvDf = notSettleDf[notSettleDf['业务种类'].isin(operateLtvList)]
  1006. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前未结清个人经营性贷款余额总和'] = np.sum(operateLtvDf['余额(本金)'])
  1007. # 当前平均每月贷款余额总和
  1008. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前平均每月贷款余额总和'] = round(np.sum(normalDf['余额(本金)'])/12,2)
  1009. #当前正常贷款账户余额
  1010. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额'] = np.sum(normalDf['余额(本金)'])
  1011. # "从“贷款信息”中提取,剔除结清、转出,当前正常贷款账户余额/未结清贷款总余额(本金余额加总)
  1012. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常贷款账户余额占总余额比'] = round(np.sum(normalDf['余额(本金)'])/np.sum(notSettleDf['余额(本金)']))
  1013. settleDf = df[(df['账户状态'] == '结清')]
  1014. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数'] = settleDf.index.size
  1015. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前正常结清贷款账户数占比'] = round(settleDf.index.size/df.index.size,2)
  1016. #贷款24期还款记录次数 TODO
  1017. # 最近3个月个人消费贷款发放额度
  1018. loanAccountInfoDf.loc[loanAccountInfoIndex, '贷款本月实还款金额'] = np.sum(loanOverdueLtvDf['本月应还款'])
  1019. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近3个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df,3,queryInfo["reportTime"])
  1020. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近6个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 6,queryInfo["reportTime"])
  1021. loanAccountInfoDf.loc[loanAccountInfoIndex, '最近12个月个人消费贷款发放额度'] = lip.getLastPerConsumeAmt(df, 12,queryInfo["reportTime"])
  1022. #未结清贷款平均剩余还款期数
  1023. payPieDf = notSettleDf[notSettleDf['还款期数']!='--']
  1024. if payPieDf.index.size!=0:
  1025. loanAccountInfoDf.loc[loanAccountInfoIndex, '未结清贷款平均剩余还款期数'] = round(np.sum(payPieDf['剩余还款期数'])/payPieDf.index.size,2)
  1026. # 当前贷款本月应还金额总和
  1027. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月应还金额总和'] = np.sum(notSettleDf['本月应还款'])
  1028. # 当前贷款本月实还金额总额
  1029. loanAccountInfoDf.loc[loanAccountInfoIndex, '当前贷款本月实还金额总额'] = np.sum(notSettleDf['本月实还款'])
  1030. #解析贷记卡账户信息指标
  1031. def parseCreditCardMergeDf(df):
  1032. if not df.empty:
  1033. # 历史信用卡总法人机构数
  1034. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'历史信用卡总法人机构数'] = df['发卡机构'].unique().size
  1035. # creditCardUseDf = df[df['已用额度']>0];
  1036. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'当前同时在用的信用卡机构数'] = creditCardUseDf['发卡机构'].unique().size
  1037. #统一排除
  1038. creditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1039. totalAmtDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1040. #大额专项分期额度(合计)
  1041. # 已用分期金额(合计)
  1042. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '大额专项分期额度(合计)'] = np.sum(creditDf['大额专项分期额度'])
  1043. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '已用分期金额(合计)'] = np.sum(creditDf['已用分期金额'])
  1044. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex,'贷记卡账户当前总额度'] = cip.getMaxCreditAmt(creditDf)
  1045. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近新发放的3张贷记卡平均额度'] = cip.getAvgCreditAmt(creditDf)
  1046. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过90%的机构数占比'] = cip.getUseRate(creditDf,df,0.9)
  1047. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡额度使用率超过100%的机构数占比'] = cip.getUseRate(creditDf, totalAmtDf, 1)
  1048. # 从“贷记卡信息”中提取,计算授信额度时剔除销户,计算已用额度时剔除呆账、呆帐、销户后,SUM(各账户已用额度) / SUM(各账户授信额度)
  1049. useCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1050. totalCreditDf = df[(df['币种'] == '人民币元') & (df['账户状态'] != '销户')]
  1051. totalCreditAmt = np.sum(totalCreditDf['账户授信额度'])
  1052. if totalCreditAmt != 0:#授信额度不能为0
  1053. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户当前总额度使用率'] = round(np.sum(useCreditDf['已用额度'])/np.sum(totalCreditDf['账户授信额度']),2)
  1054. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户最高使用额度总的使用率'] = round(np.sum(useCreditDf['最大使用额']) / np.sum(totalCreditDf['账户授信额度']), 2)
  1055. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡账户近6月平均额度总的使用率'] = round(np.sum(useCreditDf['最近6个月平均使用额度']) / np.sum(totalCreditDf['账户授信额度']), 2)
  1056. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数'] = np.max(creditDf['当前逾期期数'])#用于计算
  1057. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期金额'] = np.max(creditDf['当前逾期总额'])
  1058. if not creditDf.empty:
  1059. creditDf = creditDf.reset_index(drop=True)
  1060. maxOverdueIndex = np.argmax(creditDf['当前逾期期数'])
  1061. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡最大逾期期数对应的最大逾期金额'] = creditDf.loc[maxOverdueIndex,:]['当前逾期总额']
  1062. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df,queryInfo["reportTime"],3)
  1063. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 3)
  1064. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 3)
  1065. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 6)
  1066. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 6)
  1067. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 6)
  1068. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最高额度'] = cip.getLastMonthMaxCreditAmt(df, queryInfo["reportTime"], 12)
  1069. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡最低额度'] = cip.getLastMonthMinCreditAmt(df, queryInfo["reportTime"], 12)
  1070. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月开卡平均额度'] = cip.getLastMonthAvgCreditAmt(df, queryInfo["reportTime"], 12)
  1071. if not creditDf.empty:
  1072. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最近一次还款日期距今时长'] = cip.getLastPayDateMinDays(creditDf,queryInfo["reportTime"])
  1073. paySo = np.sum(creditDf['本月应还款'])
  1074. if(paySo)!=0:
  1075. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡还款比例'] = round(np.sum(creditDf['本月实还款'])/np.sum(creditDf['本月应还款']),2)
  1076. creditDfTmp = creditDf[creditDf['本月应还款']>0]
  1077. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最高还款比例'] = round(np.max(np.divide(creditDfTmp['本月实还款'] , creditDfTmp['本月应还款'])), 2)
  1078. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡最低还款比例'] = round(np.min(np.divide(creditDfTmp['本月实还款'] , creditDfTmp['本月应还款'])), 2)
  1079. normalDf = df[(df['币种'] == '人民币元') & (df['账户状态'] == '正常') & (df['当前逾期期数']==0)];
  1080. notCloseDf = df[(df['账户状态'] != '销户')]
  1081. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数'] = normalDf.index.size
  1082. if not notCloseDf.empty and not normalDf.empty:
  1083. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户数占比'] = round(normalDf.index.size/notCloseDf.index.size,2)
  1084. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡已用额度'] = np.sum(normalDf['已用额度'])
  1085. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数'] = normalDf[normalDf['已用额度']>0].index.size
  1086. if not creditDf.empty:
  1087. creditUseAmt = np.sum(creditDf['已用额度'])
  1088. if creditUseAmt!=0:
  1089. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常贷记卡账户余额占总余额比'] = round(np.sum(normalDf['已用额度']) / np.sum(creditDf['已用额度']), 2)
  1090. if notCloseDf.empty:
  1091. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = -99
  1092. else:
  1093. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数占比'] = \
  1094. round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前正常且有余额的贷记卡账户数']/notCloseDf.index.size,3)
  1095. #当前正常贷记卡账户余额占总余额比
  1096. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月实还金额总和'] = np.sum(creditDf['本月实还款'])
  1097. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡本月应还金额总和'] = np.sum(creditDf['本月应还款'])
  1098. maxAmtDf = df[(df['币种'] == '人民币元')]
  1099. if not maxAmtDf.empty:
  1100. maxAmtDf = maxAmtDf.reset_index(drop=True)
  1101. maxAmtIndex = np.argmax(maxAmtDf['账户授信额度'])
  1102. maxOpenDate = maxAmtDf.loc[maxAmtIndex,:]['开立日期'];
  1103. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '额度最高的人民币贷记卡开卡距今月份数'] = utils.difMonthReportTime(maxOpenDate,queryInfo["reportTime"]);
  1104. # 名下贷记卡数量-状态正常
  1105. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] = df[(df['账户状态'] != '销户')].index.size
  1106. # 名下贷记卡数量-状态未激活
  1107. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] = df[(df['账户状态'] == '未激活')].index.size
  1108. # 名下贷记卡数量-状态异常--异常包含(2-冻结,3-止付,5-呆帐,10-其他)
  1109. abnormalList = ['冻结','止付','呆帐','其他']
  1110. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] = df[(df['账户状态'].isin(abnormalList))].index.size
  1111. # 名下贷记卡比例-状态正常
  1112. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态正常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态正常'] / df.index.size,2)
  1113. # 名下贷记卡比例-状态未激活
  1114. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态未激活'] =round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态未激活'] / df.index.size,2)
  1115. # 名下贷记卡比例-状态异常
  1116. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡比例-状态异常'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '名下贷记卡数量-状态异常'] / df.index.size,2)
  1117. #解析准贷记卡账户信息指标
  1118. def parseCreditCardMergeDfZ(df,payRcd):
  1119. if not df.empty:
  1120. overdueCreditCardRcdDf = payRcd[payRcd['账户编号'].isin(df['账户编号'].values)];
  1121. overdueCreditCardRcdDf = utils.replacePayRcdStatusOverdue(overdueCreditCardRcdDf)
  1122. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '本月应还款(合计)'] = np.nansum(df['透支余额'])
  1123. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '本月实还款(合计)'] = np.nansum(df['本月实还款'])
  1124. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '最近一次还款日期'] = np.max(df['最近一次还款日期'])
  1125. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前一共透支期数'] = cip.getCurOverdueNum(overdueCreditCardRcdDf);
  1126. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前一共透支总额'] = np.nansum(df['透支余额'])
  1127. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '透支180天以上未支付余额(合计)'] = np.nansum(df['透支180天以上未付余额'])
  1128. creditDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户')]
  1129. if not creditDf.empty:
  1130. totalAmt = np.nansum(creditDf['账户授信额度'])
  1131. creditAmt = np.nansum(creditDf['透支余额'])
  1132. if totalAmt !=0:
  1133. #从“贷记卡信息”中提取,剔除未激活、销户后,所有账户透支金额/所有账户账户授信额度。
  1134. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '全部准贷记卡账户当前总额度使用率']=round(creditAmt/totalAmt,2)
  1135. #从“贷记卡信息”中提取,剔除未激活、销户后,MAX(单账户最高透支金额/单账户授信额度)
  1136. creditMaxDf = creditDf[creditDf['账户授信额度']>0]
  1137. if not creditMaxDf.empty:
  1138. creditMaxDf = creditMaxDf.fillna(0.0)
  1139. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '准贷记卡账户最高使用额度总的使用率'] = round(np.max(np.divide(creditMaxDf['最大透支余额'],creditMaxDf['账户授信额度'])),2)
  1140. creditMaxDf = creditDf[creditDf['最大透支余额'] > 0]
  1141. if not creditMaxDf.empty:
  1142. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡最大透支金额'] = np.max(creditMaxDf['最大透支余额'])
  1143. #从“贷记卡信息”中提取,剔除未激活、销户后,当前透支准贷记卡账户数/总准贷记卡账户数,透支账户判断:透支余额不为0的账户
  1144. creditDfTmp = creditDf[creditDf['透支余额']>0]
  1145. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡透支账户数占比'] = round(creditDfTmp.index.size / creditDf.index.size,2)
  1146. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡本月应还金额总和'] = np.nansum(df['透支余额'])
  1147. creditCardAccountInfoDfZ.loc[creditCardAccountInfoIndexZ, '当前准贷记卡本月实还金额总和'] = np.nansum(df['本月实还款'])
  1148. #解析使用率 TODO 使用汇总计算还是使用明细计算
  1149. def parseUseRate():
  1150. # useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)']
  1151. # 从“信贷交易授信及负债信息概要”中“非循环贷账户信息汇总”、“循环额度下分账户信息汇总”、“循环贷账户信息汇总”、“贷记卡账户信息汇总”和“准贷记卡账户信息汇总”里提取,SUM(
  1152. # 所有“余额”、“已用额度”和“透支余额”) / SUM(所有“授信总额”和“授信额度”)
  1153. loanUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户余额']
  1154. cycleCreditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户余额']
  1155. cycleUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户余额']
  1156. creditUseAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡已用额度']
  1157. creditAmtUseZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡已用额度']
  1158. loanTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '非循环贷账户授信总额']
  1159. cycleCreditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环额度下分账户授信总额']
  1160. cycleTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '循环贷账户授信总额']
  1161. creditTotalAmt = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '贷记卡授信总额']
  1162. creditAmtTotalZ = briefInfoDf_loanTradeCreditInfo.loc[loanTradeCreditInfoIndex, '准贷记卡授信总额']
  1163. # if str(loanUseAmt)=="nan":
  1164. # loanUseAmt = 0;
  1165. # if str(cycleCreditUseAmt) == "nan":
  1166. # loanUseAmt = 0;
  1167. # if str(cycleCreditUseAmt) == "nan":
  1168. # loanUseAmt = 0;
  1169. useAmt = loanUseAmt+cycleCreditUseAmt+cycleUseAmt+creditUseAmt+creditAmtUseZ
  1170. totalAmt = loanTotalAmt+cycleCreditTotalAmt+cycleTotalAmt+creditTotalAmt+creditAmtTotalZ
  1171. if totalAmt !=0:
  1172. useRateDf.loc[useRateIndex, '全账户使用率(已用额度/授信总额)'] = round(useAmt / totalAmt,2)
  1173. if loanTotalAmt!=0:
  1174. useRateDf.loc[useRateIndex, '非循环贷账户使用率(已用额度/授信总额)'] = round(loanUseAmt / loanTotalAmt,2)
  1175. if cycleCreditTotalAmt !=0:
  1176. useRateDf.loc[useRateIndex, '循环额度下分账户使用率(已用额度/授信总额)'] = round(cycleCreditTotalAmt / cycleCreditTotalAmt,2)
  1177. if cycleTotalAmt !=0:
  1178. useRateDf.loc[useRateIndex, '循环贷账户使用率(已用额度/授信总额)'] = round(cycleUseAmt / cycleTotalAmt,2)
  1179. if creditTotalAmt !=0:
  1180. useRateDf.loc[useRateIndex, '贷记卡账户使用率(已用额度/授信总额)'] = round(creditUseAmt / creditTotalAmt,2)
  1181. if creditAmtTotalZ !=0:
  1182. useRateDf.loc[useRateIndex, '准贷记卡账户使用率(已用额度/授信总额)'] = round(creditAmtUseZ / creditAmtTotalZ,2)
  1183. #解析开户数
  1184. def parseOpenAccount(loanDf,creditCardDf,creditCardDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1185. reportTime = queryInfo["reportTime"];
  1186. openAccountDf.loc[openAccountIndex, '近3个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,3)+cip.getOpenAccount(creditCardDf,reportTime,3)+cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1187. openAccountDf.loc[openAccountIndex, '近6个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,6)+cip.getOpenAccount(creditCardDf,reportTime,6)+cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1188. openAccountDf.loc[openAccountIndex, '近9个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,9)+cip.getOpenAccount(creditCardDf,reportTime,9)+cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1189. openAccountDf.loc[openAccountIndex, '近12个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,12)+cip.getOpenAccount(creditCardDf,reportTime,12)+cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1190. openAccountDf.loc[openAccountIndex, '近24个月全账户开户数'] = cip.getOpenAccount(loanDf,reportTime,24)+cip.getOpenAccount(creditCardDf,reportTime,24)+cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1191. openAccountDf.loc[openAccountIndex, '近3个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,consts.bankList)
  1192. openAccountDf.loc[openAccountIndex, '近6个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,consts.bankList)
  1193. openAccountDf.loc[openAccountIndex, '近9个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,consts.bankList)
  1194. openAccountDf.loc[openAccountIndex, '近12个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,consts.bankList)
  1195. openAccountDf.loc[openAccountIndex, '近24个月消费金融类账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,consts.bankList)
  1196. openAccountDf.loc[openAccountIndex, '近3个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,3,"")
  1197. openAccountDf.loc[openAccountIndex, '近6个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,6,"")
  1198. openAccountDf.loc[openAccountIndex, '近9个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,9,"")
  1199. openAccountDf.loc[openAccountIndex, '近12个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,12,"")
  1200. openAccountDf.loc[openAccountIndex, '近24个月贷款账户开户数'] = lip.getOpenAccount(loanDf,reportTime,24,"")
  1201. openAccountDf.loc[openAccountIndex, '近3个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,3)
  1202. openAccountDf.loc[openAccountIndex, '近6个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,6)
  1203. openAccountDf.loc[openAccountIndex, '近9个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,9)
  1204. openAccountDf.loc[openAccountIndex, '近12个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,12)
  1205. openAccountDf.loc[openAccountIndex, '近24个月贷记卡账户开户数'] = cip.getOpenAccount(creditCardDf,reportTime,24)
  1206. openAccountDf.loc[openAccountIndex, '近3个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,3)
  1207. openAccountDf.loc[openAccountIndex, '近6个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,6)
  1208. openAccountDf.loc[openAccountIndex, '近9个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,9)
  1209. openAccountDf.loc[openAccountIndex, '近12个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,12)
  1210. openAccountDf.loc[openAccountIndex, '近24个月准贷记卡账户开户数'] = cip.getOpenAccount(creditCardDfZ,reportTime,24)
  1211. #从“信贷交易信息明细”中“非循环贷账户”、“循环额度下分账户”、“循环贷账户”、“贷记卡账户”和“准贷记卡账户”里提取,5年里账户还款状态出现“1、2、3、4、5、6、7、D、Z、G、B”的账户数/所有账户数
  1212. overdueLoanPayRcdDf = loanPayRecordMergeDf[loanPayRecordMergeDf['账户编号'].isin(loanDf['账户编号'].values)]
  1213. overdueLoanPayRcdDf = utils.replacePayRcdStatusOverdue(overdueLoanPayRcdDf)
  1214. overdueLoanPayRcdDf = overdueLoanPayRcdDf[overdueLoanPayRcdDf['还款状态'] > 0]
  1215. overdueCreditPayRcdDf = creditCardPayRecordMergeDf[creditCardPayRecordMergeDf['账户编号'].isin(creditCardDf['账户编号'].values)]
  1216. overdueCreditPayRcdDf = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDf)
  1217. overdueCreditPayRcdDf = overdueCreditPayRcdDf[overdueCreditPayRcdDf['还款状态'] > 0]
  1218. overdueCreditPayRcdDfZ = creditCardPayRecordMergeDfZ[creditCardPayRecordMergeDfZ['账户编号'].isin(creditCardDfZ['账户编号'].values)]
  1219. overdueCreditPayRcdDfZ = utils.replacePayRcdStatusOverdue(overdueCreditPayRcdDfZ)
  1220. overdueCreditPayRcdDfZ = overdueCreditPayRcdDfZ[overdueCreditPayRcdDfZ['还款状态'] > 0]
  1221. loanAccountNum = loanPayRecordMergeDf['账户编号'].unique().size
  1222. creditAccountNum = creditCardPayRecordMergeDf['账户编号'].unique().size
  1223. creditAccountNumZ = creditCardPayRecordMergeDfZ['账户编号'].unique().size
  1224. overdueLoanNum = overdueLoanPayRcdDf['账户编号'].unique().size
  1225. overdueCreditNum = overdueCreditPayRcdDf['账户编号'].unique().size
  1226. overdueCreditNumZ = overdueCreditPayRcdDfZ['账户编号'].unique().size
  1227. openAccountDf.loc[openAccountIndex, '有过逾期记录的账户/全账户数'] = round((overdueLoanNum+overdueCreditNum+overdueCreditNumZ)/(loanAccountNum+creditAccountNum+creditAccountNumZ),2)
  1228. otherPerLoanDf = loanDf[loanDf['业务种类'].isin(consts.otherPerLoan)]
  1229. otherPerLoanNum = otherPerLoanDf.index.size;
  1230. overdueOtherPerLoanNum = otherPerLoanDf[otherPerLoanDf['账户编号'].isin(overdueLoanPayRcdDf['账户编号'].values)].index.size;
  1231. if otherPerLoanNum!=0:
  1232. openAccountDf.loc[openAccountIndex, '有过逾期记录的消费金融类账户/全消费金融类账户数'] = round(overdueOtherPerLoanNum/otherPerLoanNum,2)
  1233. if loanAccountNum!=0:
  1234. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷款账户/全贷款账户数'] = round(overdueLoanNum/loanAccountNum,2)
  1235. if creditAccountNum!=0:
  1236. openAccountDf.loc[openAccountIndex, '有过逾期记录的贷记卡账户/全贷记卡账户数'] = round(overdueCreditNum/creditAccountNum,2)
  1237. if creditAccountNumZ!=0:
  1238. openAccountDf.loc[openAccountIndex, '有过透支记录的准贷记卡账户/全准贷记卡账户数']= round(overdueCreditNumZ/creditAccountNumZ,2)
  1239. #解析24期还款状态指标
  1240. def parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ):
  1241. #creditCardPayRecordMergeDf
  1242. # 去掉外币
  1243. creditCardMergeDf = creditCardMergeDf[creditCardMergeDf['币种']=='人民币元']
  1244. creditCardPayRecordMergeDf = creditCardPayRecordMergeDf[creditCardPayRecordMergeDf['账户编号'].isin(creditCardMergeDf['账户编号'].values)]
  1245. reportTime = queryInfo["reportTime"];
  1246. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)
  1247. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)
  1248. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)
  1249. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)
  1250. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)
  1251. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)
  1252. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“2”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)
  1253. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近6月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)
  1254. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)
  1255. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“3”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)
  1256. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近12月逾期期数大于或大等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)
  1257. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24月逾期期数大于或等于“4”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)
  1258. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近3月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)
  1259. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)
  1260. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)
  1261. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“1”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)
  1262. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)
  1263. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)
  1264. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“2”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)
  1265. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)
  1266. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)
  1267. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)
  1268. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)
  1269. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)
  1270. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1271. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1272. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“3”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1273. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近6月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,6)
  1274. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近12月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1275. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24月逾期期数大于或等于“4”的次数'] = cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1276. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近3月逾期期数大于或等于“1”的次数'] = prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,3)\
  1277. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,3)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,3)
  1278. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“1”的次数'] = \
  1279. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,6)\
  1280. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,6)
  1281. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“1”的次数'] = \
  1282. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,12)\
  1283. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,12)
  1284. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“1”的次数'] = \
  1285. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,1,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,1,24)\
  1286. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,1,24)
  1287. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“2”的次数'] = \
  1288. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,6)\
  1289. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,6)
  1290. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“2”的次数'] = \
  1291. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,12)\
  1292. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,12)
  1293. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“2”的次数'] = \
  1294. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,2,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,2,24)\
  1295. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,2,24)
  1296. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近6月逾期期数大于或等于“3”的次数'] = \
  1297. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,6)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,6)\
  1298. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,6)
  1299. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“3”的次数'] = \
  1300. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,12)\
  1301. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,12)
  1302. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“3”的次数'] = \
  1303. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,3,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,3,24)\
  1304. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,3,24)
  1305. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近12月逾期期数大于或等于“4”的次数'] = \
  1306. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,12)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,12)\
  1307. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,12)
  1308. payRcdStatusDf.loc[payRcdStatusIndex, '全账户近24月逾期期数大于或等于“4”的次数'] = \
  1309. prp.getLoanOverdueTimes(loanPayRecordMergeDf,reportTime,4,24)+cip.getLoanOverdueTimes(creditCardPayRecordMergeDf,reportTime,4,24)\
  1310. +cip.getLoanOverdueTimes(creditCardPayRecordMergeDfZ,reportTime,4,24)
  1311. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"G"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"G",24)
  1312. payRcdStatusDf.loc[payRcdStatusIndex, '贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDf,reportTime,"G",24)
  1313. payRcdStatusDf.loc[payRcdStatusIndex, '准贷记卡账户近24个月是否出现"G"'] = prp.isExistsInd(creditCardPayRecordMergeDfZ,reportTime,"G",24)
  1314. payRcdStatusDf.loc[payRcdStatusIndex, '贷款账户近24个月是否出现"Z"'] = prp.isExistsInd(loanPayRecordMergeDf,reportTime,"Z",24)
  1315. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24)
  1316. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷款账户过去24个月状态正常账户数目'] = prp.getLoanNormalCount(loanPayRecordMergeDf,reportTime,24)
  1317. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24)
  1318. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDf,reportTime,24)
  1319. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月存在逾期的账户数目'] = prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1320. payRcdStatusDf.loc[payRcdStatusIndex, '用户所有准贷记卡账户过去24个月状态正常的账户数目'] = prp.getLoanNormalCount(creditCardPayRecordMergeDfZ,reportTime,24)
  1321. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去3个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,3)
  1322. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去6个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,6)
  1323. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去12个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,12)
  1324. payRcdStatusDf.loc[payRcdStatusIndex, '用户过去24个月最大逾期期数'] = prp.getPayRcdMaxOverdueNumAllAccout(loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ,reportTime,24)
  1325. #概要信息里的字段,从还款状态计算
  1326. briefInfoDf_overdueInfoSum.loc[overdueInfoSumIndex, '该用户过去5年出现逾期的所有账户数目'] = \
  1327. prp.getLoanOverdueCount(loanPayRecordMergeDf,reportTime,24*5)+prp.getLoanOverdueCount(creditCardPayRecordMergeDf,reportTime,24*5)\
  1328. +prp.getLoanOverdueCount(creditCardPayRecordMergeDfZ,reportTime,24*5)
  1329. #解析贷款还款记录指标
  1330. def parseCreditCardMergeAndPayRecordDf(df,payRcdDf):
  1331. if not df.empty and not payRcdDf.empty:
  1332. # 正常
  1333. normalDf = df[(df['账户状态'] != '未激活') & (df['账户状态'] != '销户') & (df['账户状态'] != '呆账')]
  1334. if not normalDf.empty:
  1335. overduePayRcdDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1336. overduePayRcdDf = utils.replacePayRcdStatus(overduePayRcdDf)
  1337. # 临时保存,不用过滤还款状态为0的
  1338. payRcdMaxOverdueDf = overduePayRcdDf;
  1339. overduePayRcdDf = overduePayRcdDf[overduePayRcdDf['还款状态'] > 0]
  1340. # creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数'] = overduePayRcdDf['账户编号'].unique().size
  1341. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,“当前信用卡逾期账户数”/未销户贷记卡账户数(剔除“账户状态”为未激活、销户、呆账、呆帐后记录条数)
  1342. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期账户数占比'] = round(overduePayRcdDf['账户编号'].unique().size / normalDf.index.size, 2)
  1343. #从“贷记卡信息”中提取,剔除“账户状态”为未激活、销户、呆账、呆帐后,对(当前信用卡逾期账户数)按“开户机构代码”去重统计账户状态为逾期,按按“开户机构代码”去重后的记录条数
  1344. overdueCreditCardDf = normalDf[normalDf['账户编号'].isin(overduePayRcdDf['账户编号'].values)]
  1345. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] = overdueCreditCardDf['发卡机构'].unique().size
  1346. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数占比'] = round(creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '当前贷记卡逾期机构数'] / normalDf['发卡机构'].unique().size, 2)
  1347. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近3月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 3);
  1348. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近6月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 6);
  1349. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近9月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 9);
  1350. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近12月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 12);
  1351. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期期数'] = cip.getPayRcdMaxOverdueNum(payRcdMaxOverdueDf, 24);
  1352. reportTime = queryInfo["reportTime"]
  1353. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '近24月贷记卡最大逾期距离现在的月数'] = cip.getPayRcdMaxOverdueNumMonth(payRcdMaxOverdueDf,normalDf,reportTime, 24);
  1354. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近3个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,3);
  1355. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近6个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,6);
  1356. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近9个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,9);
  1357. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近12个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,12);
  1358. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '最近24个月贷记卡最大连续逾期月份数'] = cip.getContinuousOverdueMonth(payRcdMaxOverdueDf,normalDf,24);
  1359. payRcdTimesDf = payRcdDf[payRcdDf['账户编号'].isin(normalDf['账户编号'].values)]
  1360. payRcdTimesDf = payRcdTimesDf.sort_values(by=["账户编号", "还款日期"], ascending=(True, False))
  1361. payRcdTimesDf = payRcdTimesDf.groupby(['账户编号']).head(24)
  1362. payRcdTimesDf = payRcdTimesDf[
  1363. payRcdTimesDf['还款状态'].isin(['G', 'D', 'C', 'N', 'M', '1', '2', '3', '4', '5', '6', '7'])]#从“贷记卡信息”中提取,剔除未激活、销户、呆账、呆帐后,各账户的还款次数统计“24个月(账户)还款状态”包含"G","D","C","N","M"及数字的个数
  1364. creditCardAccountInfoDf.loc[creditCardAccountInfoIndex, '贷记卡24期还款记录次数'] = payRcdTimesDf.index.size
  1365. # 解析被追偿信息汇总
  1366. def parseRecoveryInfoMergeDf(df):
  1367. if not df.empty:
  1368. recoveryMaxPayDf = df[df['债权转移时的还款状态'] !='--']
  1369. recoveryStatusCs = df[df['账户状态'] == '催收']
  1370. if not recoveryMaxPayDf.empty:
  1371. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时的最大还款状态'] = np.max(recoveryMaxPayDf['债权转移时的还款状态']);
  1372. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时属于催收状态的账户数'] = recoveryStatusCs.index.size;
  1373. briefInfoDf_recoveryInfoSum.loc[recoveryInfoSumIndex, '债权转移时属于催收状态的账户数/被追偿信息总数'] = round(recoveryStatusCs.index.size/df.index.size,2);
  1374. #creditTradeDetailDf_recoveryInfo
  1375. # 被追偿账户总数
  1376. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex,'被追偿账户总数'] = df.index.size;
  1377. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '被追偿业务种类'] = df['业务种类'].unique().size;
  1378. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '最新一笔被追偿债券接收时间'] = np.max(df['债权接收日期']);
  1379. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '总债权金额'] = np.max(df['债权金额']);
  1380. creditTradeDetailDf_recoveryInfo.loc[recoveryInfoIndex, '债权转移时的最大还款状态'] = np.max(recoveryMaxPayDf['债权转移时的还款状态']);
  1381. def main(pdf_path):
  1382. # 解析pdf开始
  1383. with pdfplumber.open(pdf_path) as pdf:
  1384. for p in range(0, len(pdf.pages)):
  1385. page = pdf.pages[p]
  1386. # first_page = pdf.pages[1]
  1387. # if p == 3:
  1388. # print(3)
  1389. tables = page.extract_tables();
  1390. for i in range(0, len(tables)):
  1391. table = tables[i]
  1392. df = pd.DataFrame(table);
  1393. if len(keyList) > 1 and i == 0: # 判断是否被分页了
  1394. if not utils.checkHeader(df, allHeaders):
  1395. key = keyList[-1];
  1396. dfObj = dfMap[key]
  1397. # dfObj["nextDf"]=df;
  1398. # 贷款信息 贷记卡信息 强制执行记录
  1399. if key == "loanDfs" or key == "creditCardDfs" or key == "forceExecRcdDfs" or key == 'recoveryInfoDfs': # 属于列表
  1400. lastDfObj = dfObj["dfs"][-1];
  1401. lastDfObj["isByPage"] = str(p + 1);
  1402. if len(dfObj["dfs"][-1]["df"].columns) == len(df.columns): # 列数相同
  1403. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0,ignore_index=True); # 去最后一个进行合并
  1404. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1405. else:
  1406. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1407. lastDfObj["df"] = pd.concat([lastDfObj["df"], df], axis=0, ignore_index=True);
  1408. else: # 查询记录明细 为单个列表
  1409. dfObj["isByPage"] = str(p + 1);
  1410. if len(dfObj["df"].columns) == len(df.columns):
  1411. # print("key-" + key + "-page-" + str(p + 1) + "-" + "###列数相同####-被分页")
  1412. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1413. else:
  1414. # print("key-" + key + "-page-" + str(p + 1) + "-" + "列数不同-被分页")
  1415. dfObj["df"] = pd.concat([dfObj["df"], df], axis=0, ignore_index=True)
  1416. # dfObj["nextDf"] = df;
  1417. # 如果列数相等合并df
  1418. continue;
  1419. headerList0 = df.loc[0, :].tolist() # 第0行为表头
  1420. headerList0 = list(filter(None, headerList0))
  1421. headerList1 = []
  1422. if df.index.size>1:
  1423. headerList1 = df.loc[1, :].tolist() # 第1行为表头
  1424. headerList1 = list(filter(None, headerList1))
  1425. if headerList1 == queryInfoDf_header: # 被查询信息 第二行为数据
  1426. queryInfoDf = df;
  1427. dfKey = "queryInfoDf"
  1428. dfMap[dfKey]["df"] = df;
  1429. keyList.append(dfKey);
  1430. elif headerList0 == identity_header: # 身份信息
  1431. identityDf = df[:2] # 截取前2行
  1432. addressDf = df.loc[2:4,:] # 截取3到4行的第一和6
  1433. addressDf = addressDf.reset_index(drop=True)
  1434. mobileDf = utils.replaceDateColIdx(df[5:df.index.size], 5)
  1435. identityDf = pd.concat([identityDf, addressDf], axis=1, ignore_index=True) # 横向合并
  1436. dfKey = "identityDf"
  1437. dfMap[dfKey]["df"] = identityDf;
  1438. keyList.append(dfKey);
  1439. # 组装电话号码df
  1440. dfMap[dfKey]["mobileDf"] = mobileDf
  1441. elif headerList0 == mateDf_header: # 配偶信息
  1442. mateDf = df;
  1443. dfKey = "mateDf"
  1444. dfMap[dfKey]["df"] = df;
  1445. keyList.append(dfKey);
  1446. elif headerList0 == liveInfoDf_header: # 居住信息
  1447. mateDf = df;
  1448. dfKey = "liveInfoDf"
  1449. dfMap[dfKey]["df"] = df;
  1450. keyList.append(dfKey);
  1451. elif headerList0 == occupationInfo_header: # 职业信息 可能存在分页
  1452. occupationDf = df;
  1453. dfKey = "occupationDf"
  1454. dfMap[dfKey]["df"] = df;
  1455. keyList.append(dfKey);
  1456. # elif headerList0 == queryInfoBrief_header0 and headerList1 == queryInfoBrief_header1: # 查询信息概要 第二行为数据
  1457. # queryInfoBriefDf = df;
  1458. # dfKey = "queryInfoBriefDf"
  1459. # dfMap[dfKey]["df"] = df;
  1460. # keyList.append(dfKey);
  1461. elif headerList0 == loanTradeInfo_header: # 信贷交易信息
  1462. loanTradeInfoDf = df;
  1463. dfKey = "loanTradeInfoDf";
  1464. dfMap[dfKey]["df"] = df;
  1465. keyList.append(dfKey);
  1466. elif headerList1 == recoveryInfoSumDf_header: # 被追偿信息汇总
  1467. recoveryInfoSumDf = df;
  1468. dfKey = "recoveryInfoSumDf";
  1469. dfMap[dfKey]["df"] = df;
  1470. keyList.append(dfKey);
  1471. elif headerList1 == badDebtsInfoSumDf_header: # 呆账信息
  1472. badDebtsInfoSumDf = df;
  1473. dfKey = "badDebtsInfoSumDf";
  1474. dfMap[dfKey]["df"] = df;
  1475. keyList.append(dfKey);
  1476. elif headerList1 == overdueInfoSumDf_header: # 逾期透资信息汇总
  1477. overdueInfoSumDf = df;
  1478. dfKey = "overdueInfoSumDf";
  1479. dfMap[dfKey]["df"] = df;
  1480. keyList.append(dfKey);
  1481. elif headerList0 == loanAccountInfoSumDf_header0 and headerList1 == loanAccountInfoSumDf_header1: # 非循环贷账户信息汇总
  1482. loanAccountInfoSumDf = df;
  1483. dfKey = "loanAccountInfoSumDf";
  1484. dfMap[dfKey]["df"] = df;
  1485. keyList.append(dfKey);
  1486. elif headerList0 == creditCardInfoSumDf_header0 and headerList1 == creditCardInfoSumDf_header1: # 贷记卡信息汇总
  1487. creditCardInfoSumDf = df;
  1488. dfKey = "creditCardInfoSumDf";
  1489. dfMap[dfKey]["df"] = df;
  1490. keyList.append(dfKey);
  1491. elif headerList0 == creditCardInfoSumDfZ_header0 and headerList1 == creditCardInfoSumDfZ_header1: # 准贷记卡信息汇总 目前没有数据
  1492. dfKey = "creditCardInfoSumDfZ";
  1493. dfMap[dfKey]["df"] = df;
  1494. keyList.append(dfKey);
  1495. elif headerList0 == repaymentSumDf_header0:#相关还款责任汇总
  1496. dfKey = "repaymentSumDf";
  1497. dfMap[dfKey]["df"] = df;
  1498. keyList.append(dfKey);
  1499. elif headerList0 == publicInfoBriefDf_header0: #公共信息概要
  1500. dfKey = "publicInfoBriefDf";
  1501. dfMap[dfKey]["df"] = df;
  1502. keyList.append(dfKey);
  1503. elif headerList0 == queryRecordSumDf_header0:#查询记录汇总
  1504. dfKey = "queryRecordSumDf";
  1505. dfMap[dfKey]["df"] = df;
  1506. keyList.append(dfKey);
  1507. elif headerList0 == loan_header: # 贷款账户 包括循环贷,非循环贷 循环额度下分账户
  1508. dfKey = "loanDfs";
  1509. dfMap[dfKey]["dfs"].append({"df": df});
  1510. keyList.append(dfKey);
  1511. elif headerList0 == creditCard_header: # 贷记卡账户
  1512. dfKey = "creditCardDfs";
  1513. dfMap[dfKey]["dfs"].append({"df": df});
  1514. keyList.append(dfKey);
  1515. elif headerList0 == creditCardZ_header: # 准贷记卡账户 还不能和贷记卡合并
  1516. dfKey = "creditCardDfsZ";
  1517. dfMap[dfKey]["dfs"].append({"df": df});
  1518. keyList.append(dfKey);
  1519. elif headerList0 == queryRecordDetailDf_header: # 查询记录明细
  1520. dfKey = "queryRecordDetailDf";
  1521. dfMap[dfKey]["df"] = df;
  1522. keyList.append(dfKey);
  1523. elif headerList0 == housingFundRcdDfs_header: # 查询记录明细
  1524. dfKey = "housingFundRcdDfs";
  1525. dfMap[dfKey]["dfs"].append({"df": df});
  1526. keyList.append(dfKey);
  1527. elif headerList0 == forceExecRcdDfs_header: # 强制执行记录
  1528. dfKey = "forceExecRcdDfs";
  1529. dfMap[dfKey]["dfs"].append({"df": df});
  1530. keyList.append(dfKey);
  1531. elif headerList0 == recoveryInfoDfs_header: # 被追偿信息
  1532. dfKey = "recoveryInfoDfs";
  1533. dfMap[dfKey]["dfs"].append({"df": df});
  1534. keyList.append(dfKey);
  1535. # 设置分页
  1536. dfMap[dfKey]["page"] = p + 1;
  1537. logger.info("组装pdf数据完成")
  1538. logger.info("解析基础pdf数据开始")
  1539. # 打印结果解析并构建指标
  1540. for key in dfMap:
  1541. tempDfObjx = dfMap[key];
  1542. if tempDfObjx.__contains__("page"):
  1543. logger.info(key + "-page-" + str(tempDfObjx["page"]))
  1544. if tempDfObjx.__contains__("dfs"):
  1545. if key == "loanDfs": # 贷款账户
  1546. for idx in range(0, len(tempDfObjx["dfs"])):
  1547. tempDfObj = tempDfObjx["dfs"][idx];
  1548. loanAccountDfs.append(dfParser.mergeLoanDf(tempDfObj, idx,queryInfo['reportTime']))
  1549. elif key == "creditCardDfs": # 贷记卡账户合并
  1550. for idx in range(0, len(tempDfObjx["dfs"])):
  1551. tempDfObj = tempDfObjx["dfs"][idx];
  1552. tempCreditCardDf = dfParser.mergeCreditCardDf(tempDfObj, idx,queryInfo['reportTime']);
  1553. if tempCreditCardDf!=None:
  1554. creditCardAccountDfs.append(tempCreditCardDf)
  1555. elif key == "creditCardDfsZ": # 贷记卡账户合并
  1556. for idx in range(0, len(tempDfObjx["dfs"])):
  1557. tempDfObj = tempDfObjx["dfs"][idx];
  1558. tempCreditCardDfZ = dfParser.mergeCreditCardDfZ(tempDfObj, idx,queryInfo['reportTime'])
  1559. if tempCreditCardDfZ!=None:
  1560. creditCardAccountDfsZ.append(tempCreditCardDfZ)
  1561. elif key == "recoveryInfoDfs": # 贷记卡账户合并
  1562. for idx in range(0, len(tempDfObjx["dfs"])):
  1563. tempDfObj = tempDfObjx["dfs"][idx];
  1564. recoveryInfoAccountDfs.append(dfParser.mergeRecoveryInfoDf(tempDfObj, idx, queryInfo['reportTime']))
  1565. elif key == "housingFundRcdDfs": # 贷记卡账户合并
  1566. for idx in range(0, len(tempDfObjx["dfs"])):
  1567. tempDfObj = tempDfObjx["dfs"][idx];
  1568. housingFundRcdAccountDfs.append(dfParser.mergeHousingFundRcdDf(tempDfObj, idx, queryInfo['reportTime']))
  1569. else: # 其他
  1570. for tempDfObj in (tempDfObjx["dfs"]):
  1571. if tempDfObj.__contains__("isByPage"):
  1572. logger.info(key + "============其他被分页页数============" + str(tempDfObj["isByPage"]))
  1573. # logger.info(tempDfObj["df"].values)
  1574. else: # 单笔
  1575. tempDfObj = tempDfObjx;
  1576. if tempDfObj.__contains__("isByPage"):
  1577. logger.info(key + "============被分页页数================" + str(tempDfObj["isByPage"]))
  1578. # logger.info(tempDfObj["df"].values)
  1579. if key == "queryInfoDf": # 解析被查询信息
  1580. parseQueryInfo(tempDfObj);
  1581. # print("\033[1;31m +查询信息+ \033[0m")
  1582. # print(queryInfo)
  1583. elif key == "identityDf": # 身份信息
  1584. parseIdentity(tempDfObj)
  1585. # print("\033[1;31m +身份信息+ \033[0m")
  1586. # print(identity)
  1587. elif key == "mateDf": # 配偶信息
  1588. parseMate(tempDfObj)
  1589. # print("\033[1;31m +配偶信息+ \033[0m")
  1590. # print(mate)
  1591. elif key == "liveInfoDf": # 居住信息
  1592. parseLiveInfo(tempDfObj)
  1593. # print("\033[1;31m +居住信息+ \033[0m")
  1594. elif key == "occupationDf": # 居住信息
  1595. parseOccupationInfoDf(tempDfObj)
  1596. elif key == "loanTradeInfoDf": # 信贷交易信息提示
  1597. parseLoanTradeInfo(tempDfObj);
  1598. # print("\033[1;31m +信贷交易信息提示+ \033[0m")
  1599. # print(loanTradeInfo)
  1600. elif key == "badDebtsInfoSumDf": # 呆账信息汇总
  1601. parseBadDebtsInfoSumDf(tempDfObj)
  1602. # print("\033[1;31m +呆账信息汇总+ \033[0m")
  1603. # print(overdueBrief)
  1604. elif key == "recoveryInfoSumDf": # 被追偿信息汇总-资产处置和垫款
  1605. parseRecoveryInfoSum(tempDfObj)
  1606. # print("\033[1;31m +资产处置和垫款+ \033[0m")
  1607. # print(overdueBrief)
  1608. elif key == "overdueInfoSumDf": # 逾期(透支)信息汇总
  1609. parseOverdueInfoSum(tempDfObj)
  1610. # print("\033[1;31m +逾期(透支)信息汇总+ \033[0m")
  1611. # print(overdueInfo)
  1612. elif key == "loanAccountInfoSumDf": # 非循环贷账户信息汇总 TODO
  1613. parseLoanAccountInfoSum(tempDfObj)
  1614. elif key == "cycleCreditAccountInfoSumDf":#循环额度
  1615. parseCycleCreditAccountInfoSum(tempDfObj)
  1616. elif key == "cycleLoanAccountInfoSumDf":#循环贷
  1617. parseCyleLoanAccountInfoSum(tempDfObj)
  1618. elif key == "creditCardInfoSumDf":#贷记卡
  1619. parseCreditCardInfoSum(tempDfObj)
  1620. elif key == "creditCardInfoSumDfZ": # 准贷记卡
  1621. parseCreditCardInfoSumZ(tempDfObj)
  1622. elif key == "repaymentSumDf": # 相关还款责任
  1623. parseRepaymentSum(tempDfObj)
  1624. elif key == "publicInfoBriefDf":
  1625. parsePublicInfoBrief(tempDfObj);
  1626. elif key == "queryRecordSumDf":
  1627. parseQueryRecordSum(tempDfObj);
  1628. elif key == "queryRecordDetailDf": # 查询记录明细
  1629. parseQueryInfoDetail(tempDfObj)#
  1630. logger.info("解析基础pdf数据完成")
  1631. result = ""
  1632. # 基本信息
  1633. # result+=("\033[1;34m +身份信息+ \033[0m")+"\n"
  1634. result+=utils.toJson(identityInfoDf)+"\n"
  1635. result += utils.toJson(mateInfoDf) + "\n"
  1636. result += utils.toJson(liveInfoDf) + "\n"
  1637. result += utils.toJson(occupationInfoDf) + "\n"
  1638. # result+=("\033[1;34m +概要信息+ \033[0m")+"\n"
  1639. # result+=("\033[1;34m +信贷交易信息提示+ \033[0m")+"\n"
  1640. result+=utils.toJson(briefInfoDf_loanTradeInfo)+"\n"
  1641. # result+=("\033[1;34m +被追偿信息汇总及呆账信息汇总+ \033[0m")+"\n"
  1642. result+="briefInfoDf_recoveryInfoSum"+"\n" #占位符
  1643. result += utils.toJson(briefInfoDf_badDebtsInfoSum) + "\n"
  1644. # result+=("\033[1;34m +逾期(透支)信息汇总+ \033[0m")+"\n"
  1645. #此信息先占位
  1646. result+="briefInfoDf_overdueInfoSum"+"\n"
  1647. # result+=("\033[1;34m +信贷交易授信及负债信息概要+ \033[0m")+"\n"
  1648. result+=utils.toJson(briefInfoDf_loanTradeCreditInfo)+"\n"
  1649. #公共信息
  1650. result += utils.toJson(publicInfoBriefDf) + "\n"
  1651. #查询记录汇总
  1652. result += utils.toJson(queryRecordSumDf) + "\n"
  1653. # 单独输出贷款df
  1654. # logger.info("\033[1;34m +贷款信息Dataframe+ \033[0m")
  1655. # logger.info(dfParser.dfHeaderLoan)
  1656. logger.info("解析贷款数据开始")
  1657. loanMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoan)
  1658. loanPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoanPayRecord)
  1659. loanSpecialTradeMergeDf = pd.DataFrame(columns=dfParser.dfHeaderLoanSpecialTrade)#特殊交易
  1660. # 输出数据
  1661. for loanDfObj in loanAccountDfs:
  1662. loanMergeDf = pd.concat([loanMergeDf, loanDfObj["loanDf"]], axis=0, ignore_index=True);
  1663. loanPayRecordMergeDf = pd.concat([loanPayRecordMergeDf, loanDfObj["loanPayRecordDf"]], axis=0,ignore_index=True);
  1664. loanSpecialTradeMergeDf = pd.concat([loanSpecialTradeMergeDf, loanDfObj["specialTradeDf"]], axis=0, ignore_index=True);
  1665. # logger.info(loanMergeDf.values)
  1666. # logger.info("\033[1;34m +贷款信息还款记录Dataframe+ \033[0m")
  1667. # logger.info(dfParser.dfHeaderLoanPayRecord)
  1668. # logger.info(loanPayRecordMergeDf.values)
  1669. #
  1670. #==============================信贷交易明细 ===============================
  1671. #被追偿信息
  1672. # 被追偿信息合并df
  1673. recoveryInfoMergeDf = pd.DataFrame(columns=dfParser.dfHeaderRecoveryInfo)
  1674. for recoveryInfoDfObj in recoveryInfoAccountDfs:
  1675. recoveryInfoMergeDf = pd.concat([recoveryInfoMergeDf, recoveryInfoDfObj["recoveryInfoDf"]], axis=0,
  1676. ignore_index=True);
  1677. parseRecoveryInfoMergeDf(recoveryInfoMergeDf);
  1678. #被追偿信息
  1679. result = result.replace("briefInfoDf_recoveryInfoSum", utils.toJson(briefInfoDf_recoveryInfoSum))#替换汇总中的指标
  1680. result += utils.toJson(creditTradeDetailDf_recoveryInfo) + "\n" #设置占位符,由于存在概要的指标在明细中计算
  1681. #特殊交易
  1682. parseSpecialTrade(loanSpecialTradeMergeDf)
  1683. result += utils.toJson(creditTradeDetailHeader_specialTrade) + "\n"
  1684. # 信贷交易明细-解析非循环贷账户
  1685. parseLoanAccountInfo(loanMergeDf);
  1686. result += utils.toJson(creditTradeDetailDf_loanAccountInfo) + "\n"
  1687. #循环额度分账户
  1688. parseCycleCreditAccountInfo(loanMergeDf);
  1689. result += utils.toJson(creditTradeDetailDf_cycleCreditAccountInfo) + "\n"
  1690. #循环贷
  1691. parseCycleLoanAccountInfo(loanMergeDf);
  1692. result += utils.toJson(creditTradeDetailDf_cycleLoanAccountInfo) + "\n"
  1693. # 解析贷款账户指标
  1694. parseLoanMergeDf(loanMergeDf);
  1695. # 解析还款记录相关指标
  1696. parseLoanMergeAndPayRecordDf(loanMergeDf, loanPayRecordMergeDf);
  1697. # logger.info(loanAccountInfo)
  1698. # logger.info(consts.loanAccountInfoHeader)
  1699. # logger.info(loanAccountInfoDf.values)
  1700. # result+=("\033[1;34m +贷款账户信息+ \033[0m")+"\n"
  1701. result+=utils.toJson(loanAccountInfoDf)+"\n"
  1702. logger.info("解析贷款数据完成")
  1703. logger.info("解析贷记卡数据开始")
  1704. #贷记卡合并df
  1705. creditCardMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCard)
  1706. creditCardPayRecordMergeDf = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecord)
  1707. # logger.info("\033[1;34m +贷记卡信息Dataframe+ \033[0m")
  1708. # logger.info(dfParser.dfHeaderCreditCard)
  1709. # 输出数据
  1710. for creditCardDfObj in creditCardAccountDfs:
  1711. creditCardMergeDf = pd.concat([creditCardMergeDf, creditCardDfObj["creditCardDf"]], axis=0, ignore_index=True);
  1712. creditCardPayRecordMergeDf = pd.concat([creditCardPayRecordMergeDf, creditCardDfObj["creditCardPayRecordDf"]], axis=0,ignore_index=True);
  1713. # logger.info(creditCardMergeDf.values)
  1714. # 解析贷记卡账户指标
  1715. parseCreditCardMergeDf(creditCardMergeDf);
  1716. parseCreditCardMergeAndPayRecordDf(creditCardMergeDf,creditCardPayRecordMergeDf)
  1717. #准贷记卡合并df
  1718. creditCardMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardZ)
  1719. creditCardPayRecordMergeDfZ = pd.DataFrame(columns=dfParser.dfHeaderCreditCardPayRecordZ)
  1720. for creditCardDfObj in creditCardAccountDfsZ:
  1721. creditCardMergeDfZ = pd.concat([creditCardMergeDfZ, creditCardDfObj["creditCardDfZ"]], axis=0, ignore_index=True);
  1722. creditCardPayRecordMergeDfZ = pd.concat([creditCardPayRecordMergeDfZ, creditCardDfObj["creditCardPayRecordDfZ"]], axis=0,ignore_index=True);
  1723. #解析准贷记卡相关指标
  1724. parseCreditCardMergeDfZ(creditCardMergeDfZ,creditCardPayRecordMergeDfZ);
  1725. logger.info("解析贷记卡数据完成")
  1726. #加工使用率指标
  1727. # result+=("\033[1;34m +贷记卡账户信息+ \033[0m")+"\n"
  1728. result+=utils.toJson(creditCardAccountInfoDf)+"\n"
  1729. result += utils.toJson(creditCardAccountInfoDfZ) + "\n"
  1730. #使用率
  1731. parseUseRate()
  1732. result += utils.toJson(useRateDf) + "\n"
  1733. #开户数
  1734. parseOpenAccount(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,recoveryInfoMergeDf,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1735. result += utils.toJson(openAccountDf) + "\n"
  1736. #24期还款状态
  1737. parsePayRcdStatus(loanMergeDf, creditCardMergeDf, creditCardMergeDfZ,loanPayRecordMergeDf,creditCardPayRecordMergeDf,creditCardPayRecordMergeDfZ)
  1738. result += utils.toJson(payRcdStatusDf) + "\n"
  1739. #由于逾期汇总的指标再还款状态之后需要替换占位 TODO
  1740. result = result.replace("briefInfoDf_overdueInfoSum",utils.toJson(briefInfoDf_overdueInfoSum))
  1741. #公积金
  1742. # 被追偿信息合并df
  1743. housingFundRcdMergeDf = pd.DataFrame(columns=dfParser.dfHeaderHousingFundRcd)
  1744. for housingFundRcdDfObj in housingFundRcdAccountDfs:
  1745. housingFundRcdMergeDf = pd.concat([housingFundRcdMergeDf, housingFundRcdDfObj["housingFundRcdDf"]], axis=0,ignore_index=True);
  1746. parseHousingFundRcd(housingFundRcdMergeDf);
  1747. result += utils.toJson(housingFundRcdDf) + "\n"
  1748. # result+=("\033[1;34m +查询记录明细+ \033[0m")+"\n"
  1749. result+=utils.toJson(queryRecordDetailDf)+"\n"
  1750. return result;
  1751. def uploadReportResult():
  1752. # ===================================
  1753. logger.info("准备上传文件")
  1754. uploadApiUrl = config.get("baseconf", "uploadApiUrl");
  1755. uploadApiUrl = uploadApiUrl + "?access_token=" + dbController.getToken()
  1756. files = {'file': open(outPath, 'rb')}
  1757. businessNum = dbController.getBussinessNum(queryInfo["queryInfoCardId"]); # 根据身份证获取业务编号
  1758. data = {'docType': "23", 'businessNum': businessNum}
  1759. response = requests.post(uploadApiUrl, files=files, data=data)
  1760. text = response.text
  1761. p = PrpCrypt(config.get("baseconf", "AESKey"))
  1762. # logger.info("token:"+token)
  1763. # logger.info(url)
  1764. # logger.info(result.text)
  1765. resultText = p.decrypt(text)
  1766. logger.info("upload_result:" + resultText)
  1767. # grouped.to_csv(r'C:\Users\Mortal\Desktop\ex.csv',index=False, encoding='utf_8_sig')
  1768. if __name__ == '__main__':
  1769. basePath = "D:/mydocument/myproject/git/busscredit/Crerdai/";
  1770. pdf_path = basePath + "闻海雁532329198801060347.pdf"
  1771. # pdf_path = basePath+"雷雨晴130630199006130027.pdf"
  1772. pdf_path=basePath+"杨安140402197102111236.pdf"
  1773. # pdf_path=basePath+"刘盼兰130133198912261210.pdf"
  1774. # pdf_path=basePath+"马维强130521198604045272.pdf"
  1775. pdf_path = basePath + "郑晨晨130681199008205811.pdf"
  1776. # pdf_path=basePath+"人行征信模拟数据报告.pdf"
  1777. pdf_path = basePath + "艾思语51112319960218732X.pdf"
  1778. # basePath = "D:/mydocument/myproject/git/busscredit/20200430_report/";
  1779. basePath = "D:/mydocument/myprojects/creditreport/pdf/"
  1780. # pdf_path = basePath + "周颖500108199002111229.pdf"#准贷记卡已销户 呆账
  1781. # pdf_path = basePath + "王思13052819911012122X.pdf"#公积金
  1782. # pdf_path = basePath + "杨夏龙440902198410014270.pdf"#转出
  1783. # pdf_path = basePath + "翟彦超230125199004174216.pdf"#准贷记卡 呆账
  1784. # pdf_path = basePath + "蔡月辉330326198502116146.pdf" # 配偶
  1785. # pdf_path = basePath + "周芳芳342501198706111782.pdf" #被追偿信息
  1786. pdf_path = basePath + "付春雁533001198507220344.pdf" # 公积金记录
  1787. # pdf_path = basePath + "陈洁350122199005027726.pdf" # 相关还款责任
  1788. if len(sys.argv)>1:
  1789. basePath = sys.argv[1]
  1790. pdf_path = basePath + sys.argv[2]
  1791. print(sys.argv)
  1792. isBat = False#批量的有问题
  1793. isPlt = config.get("baseconf", "isPlt");
  1794. if isBat:#批量生成数据不对
  1795. for file in os.listdir(basePath):
  1796. if file.endswith("pdf"):
  1797. start = timeit.default_timer();
  1798. pdf_path = basePath+file;
  1799. outPath = pdf_path.replace("pdf",'txt')
  1800. if os.path.exists(outPath):
  1801. continue;
  1802. logger.info(file + "解析开始...")
  1803. try:
  1804. result = main(pdf_path)
  1805. except:
  1806. info = sys.exc_info()
  1807. logger.error(info[0])
  1808. logger.error( info[1])
  1809. # logging.log(logging.ERROR, info[2])
  1810. logger.error(traceback.extract_tb(info[2], 1))
  1811. # print(result)
  1812. #输出到文件
  1813. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1814. print(result.replace("\033[1;34m","").replace("\033[0m",""))
  1815. logger.info(file+"解析完成")
  1816. gc.collect()
  1817. s = timeit.default_timer() - start;
  1818. logger.info(str(s) + " 秒")
  1819. else:
  1820. if pdf_path.endswith("pdf"):
  1821. start = timeit.default_timer();
  1822. logger.info(pdf_path + "解析开始...")
  1823. outPath = pdf_path.replace("pdf", 'txt')
  1824. result = ""
  1825. if isPlt == "1":
  1826. if not os.path.exists(outPath):#不存在才生成
  1827. try:
  1828. result = main(pdf_path)
  1829. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1830. print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  1831. logger.info(pdf_path + "解析完成")
  1832. s = timeit.default_timer() - start;
  1833. logger.info(str(s) + " 秒")
  1834. uploadReportResult();
  1835. except:
  1836. info = sys.exc_info()
  1837. logger.error(pdf_path+"#"+"解析失败")
  1838. logger.error(info[0])
  1839. logger.error(info[1])
  1840. logger.error(traceback.extract_tb(info[2]))
  1841. else:
  1842. result = main(pdf_path)
  1843. sys.stdout = open(outPath, mode='w', encoding='utf-8')
  1844. print(result.replace("\033[1;34m", "").replace("\033[0m", ""))
  1845. logger.info(pdf_path + "解析完成")
  1846. s = timeit.default_timer() - start;
  1847. logger.info(str(s) + " 秒")
  1848. uploadReportResult();