Initial commit

master
Ringo Watanabe 6 years ago
commit fbe7dd9b13
No known key found for this signature in database
GPG Key ID: C1C1CD34CF2907B2

@ -0,0 +1,151 @@
import sys
import os
import os.path
import traceback
import json
import socket
import time
import datetime
import json
import re
import time
import threading
import binascii
from cffi import FFI
import select
from subprocess import Popen, PIPE
import argparse
try:
import ssl
except ImportError:
print ("error: no ssl support")
import requests
def __pmap():
return [
("com", "c")
]
def __encodepost(post):
pass #TODO: __pmap comp and lzma comp
class StatBuffer:
SLV_LOW = 0 #Keep everything
SLV_NOTEXT = 1 #Remove subject & comment
SLV_NOUI = 2 #Remove all user inputed information (sub, com, filename)
SLV_HIGH = 0xFF #Keep only post number
def __init__():
pass
def _encode(post, striplv):
if(striplv == StatBuffer.SLV_LOW):
return __encodepost(post)
elif(striplv == StatBuffer.SLV_NOTEXT):
if "com" in post:
del post["com"]
if "sub" in post:
del post["sub"]
return __encodepost(post)
elif(striplv == StatBuffer.SLV_NOUI):
if "com" in post:
del post["com"]
if "sub" in post:
del post["sub"]
#TODO: Remove image stuff
return __encodepost(post)
elif(striplv == StatBuffer.SLV_HIGH):
return post["no"]
else: return None
def write():
raise NotImplementedError("Abstract method not implemented")
def read():
raise NotImplementedError("Abstract method not implemented")
def parse_post(post):
res = dict()
if(not "resto" in post or post["resto"] == 0): #is thread OP
if("sticky" in post):
return None
else:
res["thread"] = post["resto"]
res["no"] = post["no"]
if("com" in post):
res["com"] = post["com"]
if("sub" in post):
res["sub"] = post["sub"]
res["time"] = post["now"]
if("name" in post and post["name"] != "Anonymous"):
res["name"] = post["name"]
if("trip" in post):
res["trip"] = post["trip"]
if("country" in post):
res["country"] = post["country"]
if "id" in post:
res["id"] = post["id"]
if "filename" in post:
res["filename"] = post["filename"] + post["ext"]
res["image"] = post["md5"]
res["realFilename"] = post["tim"]
res["fileSize"] = post["fsize"]
return res
def parse_page(api, board, page, last):
dta = requests.get((api % board)+page+".json")
posts = list()
page = dta.json()
for thread in page["threads"]:
post = thread["posts"][0]
if post["no"] <= last:
continue
else:
fullThread = requests.get((api % board)+"thread/"+str(post["no"])+".json").json()
for fPost in fullThread["posts"]:
np = parse_post(fPost)
if(np!=None):
posts.append(np)
return posts
def pnomax(last, posts):
mx=last
for post in posts:
if(post["no"]>mx): mx = post["no"] #we need this because of sage
return mx
def log(stre):
print (stre)
def buffer_write(buf, posts):
#TODO: Write buffer stuff
pass
parser = argparse.ArgumentParser(description="Real-time 4chan board watcher.")
parser.add_argument("board", help="Board to spider")
parser.add_argument("timeout", help="Time between cycles")
parser.add_argument("--buffer", help="Save buffer filename (default: use in memory buffer)", default=None)
parser.add_argument("--daemon", action="store_true", help="Run as daemon")
parser.add_argument("--api", help="Base URL of 4chan JSON API", default="http://api.4chan.org/%s/")
args = parser.parse_args()
last=0
if args.buffer !=None:
pass #TODO: Init buffer stuff
while True:
log("Reading threads for %s from %d" % (args.board, last))
posts = parse_page(args.api, args.board, "1", last)
last = pnomax(last, posts)
if(len(posts)>0):
log("%d new posts since last cycle" % len(posts))
buffer_write(posts)
else:
log("Nothing new")
time.sleep(int(args.timeout))
Loading…
Cancel
Save