From fbe7dd9b134db86eddc3fede68622b4f670e20f7 Mon Sep 17 00:00:00 2001 From: Ringo Watanabe Date: Thu, 2 Aug 2018 04:48:16 +0100 Subject: [PATCH] Initial commit --- rtbw.py | 151 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 rtbw.py diff --git a/rtbw.py b/rtbw.py new file mode 100644 index 0000000..7f8b450 --- /dev/null +++ b/rtbw.py @@ -0,0 +1,151 @@ +import sys +import os +import os.path +import traceback +import json +import socket +import time +import datetime +import json +import re +import time +import threading +import binascii +from cffi import FFI +import select +from subprocess import Popen, PIPE +import argparse +try: + import ssl +except ImportError: + print ("error: no ssl support") +import requests + +def __pmap(): + return [ + ("com", "c") + ] + +def __encodepost(post): + pass #TODO: __pmap comp and lzma comp + +class StatBuffer: + SLV_LOW = 0 #Keep everything + SLV_NOTEXT = 1 #Remove subject & comment + SLV_NOUI = 2 #Remove all user inputed information (sub, com, filename) + SLV_HIGH = 0xFF #Keep only post number + + def __init__(): + pass + def _encode(post, striplv): + if(striplv == StatBuffer.SLV_LOW): + return __encodepost(post) + elif(striplv == StatBuffer.SLV_NOTEXT): + if "com" in post: + del post["com"] + if "sub" in post: + del post["sub"] + return __encodepost(post) + elif(striplv == StatBuffer.SLV_NOUI): + if "com" in post: + del post["com"] + if "sub" in post: + del post["sub"] + #TODO: Remove image stuff + return __encodepost(post) + elif(striplv == StatBuffer.SLV_HIGH): + return post["no"] + else: return None + def write(): + raise NotImplementedError("Abstract method not implemented") + def read(): + raise NotImplementedError("Abstract method not implemented") + +def parse_post(post): + res = dict() + if(not "resto" in post or post["resto"] == 0): #is thread OP + if("sticky" in post): + return None + else: + res["thread"] = post["resto"] + res["no"] = post["no"] + if("com" in post): + res["com"] = post["com"] + if("sub" in post): + res["sub"] = post["sub"] + res["time"] = post["now"] + + if("name" in post and post["name"] != "Anonymous"): + res["name"] = post["name"] + if("trip" in post): + res["trip"] = post["trip"] + + if("country" in post): + res["country"] = post["country"] + + if "id" in post: + res["id"] = post["id"] + + if "filename" in post: + res["filename"] = post["filename"] + post["ext"] + res["image"] = post["md5"] + res["realFilename"] = post["tim"] + res["fileSize"] = post["fsize"] + + return res + +def parse_page(api, board, page, last): + dta = requests.get((api % board)+page+".json") + posts = list() + page = dta.json() + for thread in page["threads"]: + post = thread["posts"][0] + if post["no"] <= last: + continue + else: + fullThread = requests.get((api % board)+"thread/"+str(post["no"])+".json").json() + for fPost in fullThread["posts"]: + np = parse_post(fPost) + if(np!=None): + posts.append(np) + + return posts + +def pnomax(last, posts): + mx=last + for post in posts: + if(post["no"]>mx): mx = post["no"] #we need this because of sage + return mx + +def log(stre): + print (stre) + +def buffer_write(buf, posts): + #TODO: Write buffer stuff + pass + +parser = argparse.ArgumentParser(description="Real-time 4chan board watcher.") +parser.add_argument("board", help="Board to spider") +parser.add_argument("timeout", help="Time between cycles") +parser.add_argument("--buffer", help="Save buffer filename (default: use in memory buffer)", default=None) +parser.add_argument("--daemon", action="store_true", help="Run as daemon") +parser.add_argument("--api", help="Base URL of 4chan JSON API", default="http://api.4chan.org/%s/") + +args = parser.parse_args() +last=0 + +if args.buffer !=None: + pass #TODO: Init buffer stuff + +while True: + log("Reading threads for %s from %d" % (args.board, last)) + posts = parse_page(args.api, args.board, "1", last) + last = pnomax(last, posts) + + if(len(posts)>0): + log("%d new posts since last cycle" % len(posts)) + buffer_write(posts) + else: + log("Nothing new") + + time.sleep(int(args.timeout))