commit
fbe7dd9b13
@ -0,0 +1,151 @@
|
||||
import sys
|
||||
import os
|
||||
import os.path
|
||||
import traceback
|
||||
import json
|
||||
import socket
|
||||
import time
|
||||
import datetime
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
import threading
|
||||
import binascii
|
||||
from cffi import FFI
|
||||
import select
|
||||
from subprocess import Popen, PIPE
|
||||
import argparse
|
||||
try:
|
||||
import ssl
|
||||
except ImportError:
|
||||
print ("error: no ssl support")
|
||||
import requests
|
||||
|
||||
def __pmap():
|
||||
return [
|
||||
("com", "c")
|
||||
]
|
||||
|
||||
def __encodepost(post):
|
||||
pass #TODO: __pmap comp and lzma comp
|
||||
|
||||
class StatBuffer:
|
||||
SLV_LOW = 0 #Keep everything
|
||||
SLV_NOTEXT = 1 #Remove subject & comment
|
||||
SLV_NOUI = 2 #Remove all user inputed information (sub, com, filename)
|
||||
SLV_HIGH = 0xFF #Keep only post number
|
||||
|
||||
def __init__():
|
||||
pass
|
||||
def _encode(post, striplv):
|
||||
if(striplv == StatBuffer.SLV_LOW):
|
||||
return __encodepost(post)
|
||||
elif(striplv == StatBuffer.SLV_NOTEXT):
|
||||
if "com" in post:
|
||||
del post["com"]
|
||||
if "sub" in post:
|
||||
del post["sub"]
|
||||
return __encodepost(post)
|
||||
elif(striplv == StatBuffer.SLV_NOUI):
|
||||
if "com" in post:
|
||||
del post["com"]
|
||||
if "sub" in post:
|
||||
del post["sub"]
|
||||
#TODO: Remove image stuff
|
||||
return __encodepost(post)
|
||||
elif(striplv == StatBuffer.SLV_HIGH):
|
||||
return post["no"]
|
||||
else: return None
|
||||
def write():
|
||||
raise NotImplementedError("Abstract method not implemented")
|
||||
def read():
|
||||
raise NotImplementedError("Abstract method not implemented")
|
||||
|
||||
def parse_post(post):
|
||||
res = dict()
|
||||
if(not "resto" in post or post["resto"] == 0): #is thread OP
|
||||
if("sticky" in post):
|
||||
return None
|
||||
else:
|
||||
res["thread"] = post["resto"]
|
||||
res["no"] = post["no"]
|
||||
if("com" in post):
|
||||
res["com"] = post["com"]
|
||||
if("sub" in post):
|
||||
res["sub"] = post["sub"]
|
||||
res["time"] = post["now"]
|
||||
|
||||
if("name" in post and post["name"] != "Anonymous"):
|
||||
res["name"] = post["name"]
|
||||
if("trip" in post):
|
||||
res["trip"] = post["trip"]
|
||||
|
||||
if("country" in post):
|
||||
res["country"] = post["country"]
|
||||
|
||||
if "id" in post:
|
||||
res["id"] = post["id"]
|
||||
|
||||
if "filename" in post:
|
||||
res["filename"] = post["filename"] + post["ext"]
|
||||
res["image"] = post["md5"]
|
||||
res["realFilename"] = post["tim"]
|
||||
res["fileSize"] = post["fsize"]
|
||||
|
||||
return res
|
||||
|
||||
def parse_page(api, board, page, last):
|
||||
dta = requests.get((api % board)+page+".json")
|
||||
posts = list()
|
||||
page = dta.json()
|
||||
for thread in page["threads"]:
|
||||
post = thread["posts"][0]
|
||||
if post["no"] <= last:
|
||||
continue
|
||||
else:
|
||||
fullThread = requests.get((api % board)+"thread/"+str(post["no"])+".json").json()
|
||||
for fPost in fullThread["posts"]:
|
||||
np = parse_post(fPost)
|
||||
if(np!=None):
|
||||
posts.append(np)
|
||||
|
||||
return posts
|
||||
|
||||
def pnomax(last, posts):
|
||||
mx=last
|
||||
for post in posts:
|
||||
if(post["no"]>mx): mx = post["no"] #we need this because of sage
|
||||
return mx
|
||||
|
||||
def log(stre):
|
||||
print (stre)
|
||||
|
||||
def buffer_write(buf, posts):
|
||||
#TODO: Write buffer stuff
|
||||
pass
|
||||
|
||||
parser = argparse.ArgumentParser(description="Real-time 4chan board watcher.")
|
||||
parser.add_argument("board", help="Board to spider")
|
||||
parser.add_argument("timeout", help="Time between cycles")
|
||||
parser.add_argument("--buffer", help="Save buffer filename (default: use in memory buffer)", default=None)
|
||||
parser.add_argument("--daemon", action="store_true", help="Run as daemon")
|
||||
parser.add_argument("--api", help="Base URL of 4chan JSON API", default="http://api.4chan.org/%s/")
|
||||
|
||||
args = parser.parse_args()
|
||||
last=0
|
||||
|
||||
if args.buffer !=None:
|
||||
pass #TODO: Init buffer stuff
|
||||
|
||||
while True:
|
||||
log("Reading threads for %s from %d" % (args.board, last))
|
||||
posts = parse_page(args.api, args.board, "1", last)
|
||||
last = pnomax(last, posts)
|
||||
|
||||
if(len(posts)>0):
|
||||
log("%d new posts since last cycle" % len(posts))
|
||||
buffer_write(posts)
|
||||
else:
|
||||
log("Nothing new")
|
||||
|
||||
time.sleep(int(args.timeout))
|
Loading…
Reference in new issue