You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
152 lines
3.6 KiB
152 lines
3.6 KiB
import sys
|
|
import os
|
|
import os.path
|
|
import traceback
|
|
import json
|
|
import socket
|
|
import time
|
|
import datetime
|
|
import json
|
|
import re
|
|
import time
|
|
import threading
|
|
import binascii
|
|
from cffi import FFI
|
|
import select
|
|
from subprocess import Popen, PIPE
|
|
import argparse
|
|
try:
|
|
import ssl
|
|
except ImportError:
|
|
print ("error: no ssl support")
|
|
import requests
|
|
|
|
def __pmap():
|
|
return [
|
|
("com", "c")
|
|
]
|
|
|
|
def __encodepost(post):
|
|
pass #TODO: __pmap comp and lzma comp
|
|
|
|
class StatBuffer:
|
|
SLV_LOW = 0 #Keep everything
|
|
SLV_NOTEXT = 1 #Remove subject & comment
|
|
SLV_NOUI = 2 #Remove all user inputed information (sub, com, filename)
|
|
SLV_HIGH = 0xFF #Keep only post number
|
|
|
|
def __init__():
|
|
pass
|
|
def _encode(post, striplv):
|
|
if(striplv == StatBuffer.SLV_LOW):
|
|
return __encodepost(post)
|
|
elif(striplv == StatBuffer.SLV_NOTEXT):
|
|
if "com" in post:
|
|
del post["com"]
|
|
if "sub" in post:
|
|
del post["sub"]
|
|
return __encodepost(post)
|
|
elif(striplv == StatBuffer.SLV_NOUI):
|
|
if "com" in post:
|
|
del post["com"]
|
|
if "sub" in post:
|
|
del post["sub"]
|
|
#TODO: Remove image stuff
|
|
return __encodepost(post)
|
|
elif(striplv == StatBuffer.SLV_HIGH):
|
|
return post["no"]
|
|
else: return None
|
|
def write():
|
|
raise NotImplementedError("Abstract method not implemented")
|
|
def read():
|
|
raise NotImplementedError("Abstract method not implemented")
|
|
|
|
def parse_post(post):
|
|
res = dict()
|
|
if(not "resto" in post or post["resto"] == 0): #is thread OP
|
|
if("sticky" in post):
|
|
return None
|
|
else:
|
|
res["thread"] = post["resto"]
|
|
res["no"] = post["no"]
|
|
if("com" in post):
|
|
res["com"] = post["com"]
|
|
if("sub" in post):
|
|
res["sub"] = post["sub"]
|
|
res["time"] = post["now"]
|
|
|
|
if("name" in post and post["name"] != "Anonymous"):
|
|
res["name"] = post["name"]
|
|
if("trip" in post):
|
|
res["trip"] = post["trip"]
|
|
|
|
if("country" in post):
|
|
res["country"] = post["country"]
|
|
|
|
if "id" in post:
|
|
res["id"] = post["id"]
|
|
|
|
if "filename" in post:
|
|
res["filename"] = post["filename"] + post["ext"]
|
|
res["image"] = post["md5"]
|
|
res["realFilename"] = post["tim"]
|
|
res["fileSize"] = post["fsize"]
|
|
|
|
return res
|
|
|
|
def parse_page(api, board, page, last):
|
|
dta = requests.get((api % board)+page+".json")
|
|
posts = list()
|
|
page = dta.json()
|
|
for thread in page["threads"]:
|
|
post = thread["posts"][0]
|
|
if post["no"] <= last:
|
|
continue
|
|
else:
|
|
fullThread = requests.get((api % board)+"thread/"+str(post["no"])+".json").json()
|
|
for fPost in fullThread["posts"]:
|
|
np = parse_post(fPost)
|
|
if(np!=None):
|
|
posts.append(np)
|
|
|
|
return posts
|
|
|
|
def pnomax(last, posts):
|
|
mx=last
|
|
for post in posts:
|
|
if(post["no"]>mx): mx = post["no"] #we need this because of sage
|
|
return mx
|
|
|
|
def log(stre):
|
|
print (stre)
|
|
|
|
def buffer_write(buf, posts):
|
|
#TODO: Write buffer stuff
|
|
pass
|
|
|
|
parser = argparse.ArgumentParser(description="Real-time 4chan board watcher.")
|
|
parser.add_argument("board", help="Board to spider")
|
|
parser.add_argument("timeout", help="Time between cycles")
|
|
parser.add_argument("--buffer", help="Save buffer filename (default: use in memory buffer)", default=None)
|
|
parser.add_argument("--daemon", action="store_true", help="Run as daemon")
|
|
parser.add_argument("--api", help="Base URL of 4chan JSON API", default="http://api.4chan.org/%s/")
|
|
|
|
args = parser.parse_args()
|
|
last=0
|
|
|
|
if args.buffer !=None:
|
|
pass #TODO: Init buffer stuff
|
|
|
|
while True:
|
|
log("Reading threads for %s from %d" % (args.board, last))
|
|
posts = parse_page(args.api, args.board, "1", last)
|
|
last = pnomax(last, posts)
|
|
|
|
if(len(posts)>0):
|
|
log("%d new posts since last cycle" % len(posts))
|
|
buffer_write(posts)
|
|
else:
|
|
log("Nothing new")
|
|
|
|
time.sleep(int(args.timeout))
|