You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

352 lines
15 KiB

using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Io;
using System;
using System.Collections.Generic;
using System.IO;
using System.Runtime.CompilerServices;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading;
using System.Threading.Tasks;
using System.Linq;
namespace napdump.Dumpers
{
class Nineball : Dumper
{
readonly IConfiguration browserConfig;
readonly IBrowsingContext context;
readonly ICookieProvider cookies;
public Nineball(DumperConfig config) : base(config)
{
browserConfig = Configuration.Default.WithDefaultCookies().WithDefaultLoader();
cookies = browserConfig.Services.OfType<ICookieProvider>().First();
foreach (var c in config.Cookies ?? Array.Empty<(string Url, string Value)>())
{
cookies.SetCookie(new Url(c.Url), c.Value);
}
context = BrowsingContext.New(browserConfig);
}
private static readonly Regex reBoardName = new Regex(@"^(\/.*?\/)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static readonly Regex reBoardNameW = new Regex(@"^\/(\w+)\/", RegexOptions.Compiled | RegexOptions.IgnoreCase);
protected async Task GetBoardInfo(BoardInfo bi, IDocument document, CancellationToken token)
{
await Task.Yield();
token.ThrowIfCancellationRequested();
bi.Title = document.QuerySelector("body > threads > h1").InnerHtml;
bi.BoardName = reBoardName.IsMatch(bi.Title) ? reBoardName.Match(bi.Title).Groups[1].Value : bi.Title;
bi.SafeName = reBoardNameW.IsMatch(bi.Title) ? reBoardNameW.Match(bi.Title).Groups[1].Value : "unbound";
bi.Description = document.QuerySelector("#banner_info").TextContent;
bi.Tags = new[] { "meguca", "node", "liveboard" };
}
private static readonly Regex reImageDim = new Regex(@"\((\d+) ([kmg]?b), (\d+)x(\d+)\)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static bool TryParseImageDimInfo(string info, out long size, out int x, out int y)
{
//Console.WriteLine(info + " " + reImageDim.IsMatch(info));
if(reImageDim.IsMatch(info))
{
var groups = reImageDim.Match(info).Groups;
if(long.TryParse(groups[1].Value, out var rawSize) &&
int.TryParse(groups[3].Value, out x) &&
int.TryParse(groups[4].Value, out y))
{
long multiplier = 1;
switch (groups[2].Value.ToLower().Trim())
{
case "b":
break;
case "kb":
multiplier = 1024;
break;
case "mb":
multiplier = 1024 * 1024;
break;
case "gb":
multiplier = 1024 * 1024 * 1024;
break;
default:
goto bad;
}
size = rawSize & multiplier;
return true;
}
}
bad:
size = default;
x = y = default;
return false;
}
private static readonly Regex reDateTime = new Regex(@"(\d\d) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d\d\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
private static bool TryParseDateTime(string htmlDateTime, out DateTime dt)
{
htmlDateTime = htmlDateTime.Trim();
//Console.WriteLine(htmlDateTime + " " + reDateTime.IsMatch(htmlDateTime));
if(reDateTime.IsMatch(htmlDateTime))
{
var groups = reDateTime.Match(htmlDateTime).Groups;
int day = int.Parse(groups[1].Value);
string month = groups[2].Value;
int year = int.Parse(groups[3].Value);
int hour = int.Parse(groups[4].Value);
int minute = int.Parse(groups[5].Value);
try
{
dt = new DateTime(year, month switch
{
"Jan" => 1,
"Feb" => 2,
"Mar" => 3,
"Apr" => 4,
"May" => 5,
"Jun" => 6,
"Jul" => 7,
"Aug" => 8,
"Sep" => 9,
"Oct" => 10,
"Nov" => 11,
"Dec" => 12,
_ => throw new InvalidDataException(),
}, day, hour, minute, 0);
return true;
}
catch
{
dt = default;
return false;
}
}
dt = default;
return false;
}
private static readonly Regex reImageDeleted = new Regex(@"^Image deleted by (\w+)$", RegexOptions.Compiled);
private static readonly Regex reImageSpoilered = new Regex(@"^Image spoilered by (\w+)$", RegexOptions.Compiled);
private static readonly Regex rePostDeleted = new Regex(@"^Post deleted by (\w+)$", RegexOptions.Compiled);
private static readonly Regex reUserBanned = new Regex(@"^User banned by (\w+)(?: for (.+))?$", RegexOptions.Compiled);
private static void getModlog(string nodeHtml, out Modlog log)
{
log = new Modlog();
if (nodeHtml == null) return;
try
{
var split = nodeHtml.Split("<br>").Select(x => x.Trim()).Where(x=> x.Length>0);
foreach (var line in split)
{
if (reImageDeleted.IsMatch(line))
log.ImageDeleted = AdminInfo.Create(true, reImageDeleted.Match(line).Groups[1].Value);
if (reImageSpoilered.IsMatch(line))
log.ImageSpoilered = AdminInfo.Create(true, reImageSpoilered.Match(line).Groups[1].Value);
if (rePostDeleted.IsMatch(line))
log.PostDeleted = AdminInfo.Create(true, rePostDeleted.Match(line).Groups[1].Value);
if (reUserBanned.IsMatch(line))
{
var match = reUserBanned.Match(line).Groups;
log.UserBanned = AdminInfo.Create(true, match[1].Value);
if (match[2].Success)
log.BanMessage = AdminInfo.Create(match[2].Value, match[1].Value);
}
}
}
catch(Exception ex)
{
Console.WriteLine("Modlog parsing error: "+ex.Message);
log = new Modlog();
}
}
private static readonly Regex reMailTo = new Regex(@"^mailto:", RegexOptions.Compiled);
protected override async IAsyncEnumerable<PostInfo> GetPosts(ThreadInfo thread, [EnumeratorCancellation] CancellationToken token)
{
var document = await context.OpenAsync(thread.BoardInfo.BoardURL + thread.PostNumber, token);
var section = document.QuerySelector("section");
thread.Locked = section.ClassList.Contains("locked");
(string Name, string Tripcode, string Email, string Capcode) getTripcode(IElement header)
{
var bname = header.QuerySelector("b");
string name, trip, mail, cap;
name = trip = mail = cap = null;
if(bname.FirstChild.NodeName.ToLower() == "a")
{
//Mail link
mail = bname.FirstElementChild.GetAttribute("href");
if (reMailTo.IsMatch(mail))
mail = reMailTo.Replace(mail, "");
bname = bname.FirstElementChild;
}
if(bname.ChildNodes.Length > 1 && bname.ChildNodes[1].NodeName=="CODE")
{
//Has tripcode & name
name = bname.FirstChild.TextContent;
trip = bname.ChildNodes[1].TextContent;
if (bname.ChildNodes.Length > 2)
cap = bname.ChildNodes[2].TextContent;
}
else if(bname.ChildNodes.Length>1)
{
name = bname.FirstChild.TextContent;
cap = bname.ChildNodes[1].TextContent;
}
else if(bname.FirstChild.NodeName.ToLower() == "code")
{
//Tripcode, no name.
trip = bname.FirstChild.TextContent;
}
else
{
//Name, no tripcode
name = bname.FirstChild.TextContent;
}
return (name, trip, mail, cap);
}
//Get thread's modlog.
getModlog(section.QuerySelector("b.modLog")?.InnerHtml, out var threadModlog);
thread.ModLog = threadModlog;
//Get thread's info.
var imageInfo = section.QuerySelector("figure > figcaption > i");
if (imageInfo != null)
{
string imageDimInfo = imageInfo.FirstChild.TextContent;
if (TryParseImageDimInfo(imageDimInfo, out var _imageSize, out var _x, out var _y))
{
thread.ImageSize = _imageSize;
thread.ImageDimensions = (_x, _y);
var imageNameInfo = imageInfo.QuerySelector("a");
thread.ImageURL = imageNameInfo.GetAttribute("href");
thread.ImageFilename = imageNameInfo.GetAttribute("download");
if (TryParseDateTime(section.QuerySelector("header > time").FirstChild.TextContent, out var threadTimestamp))
{
thread.Timestamp = threadTimestamp;
}
else
{
thread.Timestamp = default;
}
(thread.Name, thread.Tripcode, thread.Email, thread.Capcode) = getTripcode(section.QuerySelector("header"));
}
else
{
thread.ImageDimensions = default;
thread.ImageFilename = null;
thread.ImageSize = 0;
thread.ImageURL = null;
}
}
thread.Body = section.QuerySelector("blockquote").InnerHtml;
thread.ThreadURL = document.Url;
thread.Subject = section.QuerySelector("header > h3")?.TextContent;
//Get posts
foreach (var article in section.QuerySelectorAll("article"))
{
var post = new PostInfo()
{
Body = article.QuerySelector("blockquote").InnerHtml,
};
(post.Name, post.Tripcode, post.Email, post.Capcode) = getTripcode(article.QuerySelector("header"));
if (TryParseDateTime(article.QuerySelector("header > time").TextContent, out var _time))
post.Timestamp = _time;
else
post.Timestamp = default;
if (ulong.TryParse(article.QuerySelector("header > nav > a[class=quote]").TextContent, out ulong _postNumber))
post.PostNumber = _postNumber;
else
post.PostNumber = default;
//Get modlog
getModlog(article.QuerySelector("b.modLog")?.InnerHtml, out var postModlog);
post.ModLog = postModlog;
var figure = article.QuerySelector("figure > figcaption > i");
if (figure != null)
{
//Has image
if (TryParseImageDimInfo(figure.FirstChild.TextContent, out var _imageSize, out var _x, out var _y))
{
post.ImageDimensions = (_x, _y);
post.ImageSize = _imageSize;
post.ImageURL = figure.QuerySelector("a").GetAttribute("href");
post.ImageFilename = figure.QuerySelector("a").GetAttribute("download");
}
}
await EncryptIfRequired(post, token);
yield return post;
}
await EncryptIfRequired(thread, token);
}
private async Task EncryptIfRequired(PostInfo post, CancellationToken token)
{
try
{
if (Config.EncryptDeleted != null)
{
if (post.ModLog.ImageDeleted)
{
await post.EncryptImageAsync(Config.EncryptDeleted.Value, token);
}
if (post.ModLog.PostDeleted)
{
await post.EncryptPostAsync(Config.EncryptDeleted.Value, token);
}
}
}catch(Exception ex)
{
Console.WriteLine("Encryption for post "+post.PostNumber+" failed: " + ex.Message+"\n"+ex.StackTrace);
}
}
protected override async IAsyncEnumerable<ThreadInfo> GetThreads(BoardInfo boardInfo, [EnumeratorCancellation] CancellationToken token)
{
var document = await context.OpenAsync(boardInfo.BoardURL + "catalog", token);
await GetBoardInfo(boardInfo, document, token);
var threadLinks = document.QuerySelectorAll("#catalog > article > a[class=history]");
foreach(var link in threadLinks)
{
if (link.HasAttribute("href"))
{
var href = link.GetAttribute("href");
if (ulong.TryParse(href, out ulong postNumber))
{
yield return new ThreadInfo()
{
PostNumber = postNumber,
};
}
}
}
}
}
}