From dd16b8e3f048ee198f2f443983d4e5db8852add3 Mon Sep 17 00:00:00 2001 From: Ringo Wantanabe Date: Tue, 14 Aug 2018 01:55:18 +0100 Subject: [PATCH] Improved spider efficiency --- rtbw.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/rtbw.py b/rtbw.py index 7aa7a8d..9433833 100644 --- a/rtbw.py +++ b/rtbw.py @@ -321,13 +321,29 @@ def parse_page(api, board, page, last): if post["no"] <= last: #thread is not new #are there any new posts? + newPs=0 + _nps = list() for vp in thread["posts"]: if(vp["no"] >last): + newPs+=1 + nt = parse_post(vp) + if nt!=None: + _nps.append(nt) + if newPs>2: posts.extend(parse_thread(api,board,post,last)) - tpd+=1 - break + else: + posts.extend(_nps) + tpd+=1 + else: - posts.extend(parse_thread(api,board, post,last)) + if len(thread["posts"])>3: + posts.extend(parse_thread(api,board, post,last)) + else: + for vp in thread["posts"]: + nt = parse_post(vp) + if nt!=None: + posts.append(nt) + tpd+=1 log("\t(threads parsed this rotation: %d)"%tpd) return posts