|
|
|
@ -1,351 +1,351 @@
|
|
|
|
|
using AngleSharp;
|
|
|
|
|
using AngleSharp.Dom;
|
|
|
|
|
using AngleSharp.Io;
|
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Runtime.CompilerServices;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
using System.Threading;
|
|
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
|
|
|
|
|
namespace napdump.Dumpers
|
|
|
|
|
{
|
|
|
|
|
class Nineball : Dumper
|
|
|
|
|
{
|
|
|
|
|
readonly IConfiguration browserConfig;
|
|
|
|
|
readonly IBrowsingContext context;
|
|
|
|
|
readonly ICookieProvider cookies;
|
|
|
|
|
public Nineball(DumperConfig config) : base(config)
|
|
|
|
|
{
|
|
|
|
|
browserConfig = Configuration.Default.WithDefaultCookies().WithDefaultLoader();
|
|
|
|
|
cookies = browserConfig.Services.OfType<ICookieProvider>().First();
|
|
|
|
|
foreach (var c in config.Cookies ?? Array.Empty<(string Url, string Value)>())
|
|
|
|
|
{
|
|
|
|
|
cookies.SetCookie(new Url(c.Url), c.Value);
|
|
|
|
|
}
|
|
|
|
|
context = BrowsingContext.New(browserConfig);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reBoardName = new Regex(@"^(\/.*?\/)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static readonly Regex reBoardNameW = new Regex(@"^\/(\w+)\/", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
protected async Task GetBoardInfo(BoardInfo bi, IDocument document, CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
await Task.Yield();
|
|
|
|
|
token.ThrowIfCancellationRequested();
|
|
|
|
|
bi.Title = document.QuerySelector("body > threads > h1").InnerHtml;
|
|
|
|
|
bi.BoardName = reBoardName.IsMatch(bi.Title) ? reBoardName.Match(bi.Title).Groups[1].Value : bi.Title;
|
|
|
|
|
bi.SafeName = reBoardNameW.IsMatch(bi.Title) ? reBoardNameW.Match(bi.Title).Groups[1].Value : "unbound";
|
|
|
|
|
bi.Description = document.QuerySelector("#banner_info").TextContent;
|
|
|
|
|
bi.Tags = new[] { "meguca", "node", "liveboard" };
|
|
|
|
|
}
|
|
|
|
|
private static readonly Regex reImageDim = new Regex(@"\((\d+) ([kmg]?b), (\d+)x(\d+)\)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static bool TryParseImageDimInfo(string info, out long size, out int x, out int y)
|
|
|
|
|
{
|
|
|
|
|
//Console.WriteLine(info + " " + reImageDim.IsMatch(info));
|
|
|
|
|
if(reImageDim.IsMatch(info))
|
|
|
|
|
{
|
|
|
|
|
var groups = reImageDim.Match(info).Groups;
|
|
|
|
|
|
|
|
|
|
if(long.TryParse(groups[1].Value, out var rawSize) &&
|
|
|
|
|
int.TryParse(groups[3].Value, out x) &&
|
|
|
|
|
int.TryParse(groups[4].Value, out y))
|
|
|
|
|
{
|
|
|
|
|
long multiplier = 1;
|
|
|
|
|
switch (groups[2].Value.ToLower().Trim())
|
|
|
|
|
{
|
|
|
|
|
case "b":
|
|
|
|
|
break;
|
|
|
|
|
case "kb":
|
|
|
|
|
multiplier = 1024;
|
|
|
|
|
break;
|
|
|
|
|
case "mb":
|
|
|
|
|
multiplier = 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
case "gb":
|
|
|
|
|
multiplier = 1024 * 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto bad;
|
|
|
|
|
}
|
|
|
|
|
size = rawSize & multiplier;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
bad:
|
|
|
|
|
size = default;
|
|
|
|
|
x = y = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reDateTime = new Regex(@"(\d\d) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d\d\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static bool TryParseDateTime(string htmlDateTime, out DateTime dt)
|
|
|
|
|
{
|
|
|
|
|
htmlDateTime = htmlDateTime.Trim();
|
|
|
|
|
//Console.WriteLine(htmlDateTime + " " + reDateTime.IsMatch(htmlDateTime));
|
|
|
|
|
if(reDateTime.IsMatch(htmlDateTime))
|
|
|
|
|
{
|
|
|
|
|
var groups = reDateTime.Match(htmlDateTime).Groups;
|
|
|
|
|
|
|
|
|
|
int day = int.Parse(groups[1].Value);
|
|
|
|
|
string month = groups[2].Value;
|
|
|
|
|
int year = int.Parse(groups[3].Value);
|
|
|
|
|
int hour = int.Parse(groups[4].Value);
|
|
|
|
|
int minute = int.Parse(groups[5].Value);
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
dt = new DateTime(year, month switch
|
|
|
|
|
{
|
|
|
|
|
"Jan" => 1,
|
|
|
|
|
"Feb" => 2,
|
|
|
|
|
"Mar" => 3,
|
|
|
|
|
"Apr" => 4,
|
|
|
|
|
"May" => 5,
|
|
|
|
|
"Jun" => 6,
|
|
|
|
|
"Jul" => 7,
|
|
|
|
|
"Aug" => 8,
|
|
|
|
|
"Sep" => 9,
|
|
|
|
|
"Oct" => 10,
|
|
|
|
|
"Nov" => 11,
|
|
|
|
|
"Dec" => 12,
|
|
|
|
|
_ => throw new InvalidDataException(),
|
|
|
|
|
}, day, hour, minute, 0);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
catch
|
|
|
|
|
{
|
|
|
|
|
dt = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dt = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reImageDeleted = new Regex(@"^Image deleted by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex reImageSpoilered = new Regex(@"^Image spoilered by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex rePostDeleted = new Regex(@"^Post deleted by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex reUserBanned = new Regex(@"^User banned by (\w+)(?: for (.+))?$", RegexOptions.Compiled);
|
|
|
|
|
private static void getModlog(string nodeHtml, out Modlog log)
|
|
|
|
|
{
|
|
|
|
|
log = new Modlog();
|
|
|
|
|
if (nodeHtml == null) return;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
var split = nodeHtml.Split("<br>").Select(x => x.Trim()).Where(x=> x.Length>0);
|
|
|
|
|
|
|
|
|
|
foreach (var line in split)
|
|
|
|
|
{
|
|
|
|
|
if (reImageDeleted.IsMatch(line))
|
|
|
|
|
log.ImageDeleted = AdminInfo.Create(true, reImageDeleted.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (reImageSpoilered.IsMatch(line))
|
|
|
|
|
log.ImageSpoilered = AdminInfo.Create(true, reImageSpoilered.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (rePostDeleted.IsMatch(line))
|
|
|
|
|
log.PostDeleted = AdminInfo.Create(true, rePostDeleted.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (reUserBanned.IsMatch(line))
|
|
|
|
|
{
|
|
|
|
|
var match = reUserBanned.Match(line).Groups;
|
|
|
|
|
log.UserBanned = AdminInfo.Create(true, match[1].Value);
|
|
|
|
|
if (match[2].Success)
|
|
|
|
|
log.BanMessage = AdminInfo.Create(match[2].Value, match[1].Value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch(Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("Modlog parsing error: "+ex.Message);
|
|
|
|
|
log = new Modlog();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reMailTo = new Regex(@"^mailto:", RegexOptions.Compiled);
|
|
|
|
|
protected override async IAsyncEnumerable<PostInfo> GetPosts(ThreadInfo thread, [EnumeratorCancellation] CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
var document = await context.OpenAsync(thread.BoardInfo.BoardURL + thread.PostNumber, token);
|
|
|
|
|
|
|
|
|
|
var section = document.QuerySelector("section");
|
|
|
|
|
thread.Locked = section.ClassList.Contains("locked");
|
|
|
|
|
|
|
|
|
|
(string Name, string Tripcode, string Email, string Capcode) getTripcode(IElement header)
|
|
|
|
|
{
|
|
|
|
|
var bname = header.QuerySelector("b");
|
|
|
|
|
|
|
|
|
|
string name, trip, mail, cap;
|
|
|
|
|
name = trip = mail = cap = null;
|
|
|
|
|
|
|
|
|
|
if(bname.FirstChild.NodeName.ToLower() == "a")
|
|
|
|
|
{
|
|
|
|
|
//Mail link
|
|
|
|
|
mail = bname.FirstElementChild.GetAttribute("href");
|
|
|
|
|
if (reMailTo.IsMatch(mail))
|
|
|
|
|
mail = reMailTo.Replace(mail, "");
|
|
|
|
|
bname = bname.FirstElementChild;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(bname.ChildNodes.Length > 1 && bname.ChildNodes[1].NodeName=="CODE")
|
|
|
|
|
{
|
|
|
|
|
//Has tripcode & name
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
trip = bname.ChildNodes[1].TextContent;
|
|
|
|
|
if (bname.ChildNodes.Length > 2)
|
|
|
|
|
cap = bname.ChildNodes[2].TextContent;
|
|
|
|
|
}
|
|
|
|
|
else if(bname.ChildNodes.Length>1)
|
|
|
|
|
{
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
cap = bname.ChildNodes[1].TextContent;
|
|
|
|
|
}
|
|
|
|
|
else if(bname.FirstChild.NodeName.ToLower() == "code")
|
|
|
|
|
{
|
|
|
|
|
//Tripcode, no name.
|
|
|
|
|
trip = bname.FirstChild.TextContent;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
//Name, no tripcode
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (name, trip, mail, cap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Get thread's modlog.
|
|
|
|
|
getModlog(section.QuerySelector("b.modLog")?.InnerHtml, out var threadModlog);
|
|
|
|
|
thread.ModLog = threadModlog;
|
|
|
|
|
|
|
|
|
|
//Get thread's info.
|
|
|
|
|
var imageInfo = section.QuerySelector("figure > figcaption > i");
|
|
|
|
|
if (imageInfo != null)
|
|
|
|
|
{
|
|
|
|
|
string imageDimInfo = imageInfo.FirstChild.TextContent;
|
|
|
|
|
if (TryParseImageDimInfo(imageDimInfo, out var _imageSize, out var _x, out var _y))
|
|
|
|
|
{
|
|
|
|
|
thread.ImageSize = _imageSize;
|
|
|
|
|
thread.ImageDimensions = (_x, _y);
|
|
|
|
|
|
|
|
|
|
var imageNameInfo = imageInfo.QuerySelector("a");
|
|
|
|
|
|
|
|
|
|
thread.ImageURL = imageNameInfo.GetAttribute("href");
|
|
|
|
|
thread.ImageFilename = imageNameInfo.GetAttribute("download");
|
|
|
|
|
|
|
|
|
|
if (TryParseDateTime(section.QuerySelector("header > time").FirstChild.TextContent, out var threadTimestamp))
|
|
|
|
|
{
|
|
|
|
|
thread.Timestamp = threadTimestamp;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
thread.Timestamp = default;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(thread.Name, thread.Tripcode, thread.Email, thread.Capcode) = getTripcode(section.QuerySelector("header"));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
thread.ImageDimensions = default;
|
|
|
|
|
thread.ImageFilename = null;
|
|
|
|
|
thread.ImageSize = 0;
|
|
|
|
|
thread.ImageURL = null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
thread.Body = section.QuerySelector("blockquote").InnerHtml;
|
|
|
|
|
thread.ThreadURL = document.Url;
|
|
|
|
|
thread.Subject = section.QuerySelector("header > h3")?.TextContent;
|
|
|
|
|
|
|
|
|
|
//Get posts
|
|
|
|
|
foreach (var article in section.QuerySelectorAll("article"))
|
|
|
|
|
{
|
|
|
|
|
var post = new PostInfo()
|
|
|
|
|
{
|
|
|
|
|
Body = article.QuerySelector("blockquote").InnerHtml,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
(post.Name, post.Tripcode, post.Email, post.Capcode) = getTripcode(article.QuerySelector("header"));
|
|
|
|
|
|
|
|
|
|
if (TryParseDateTime(article.QuerySelector("header > time").TextContent, out var _time))
|
|
|
|
|
post.Timestamp = _time;
|
|
|
|
|
else
|
|
|
|
|
post.Timestamp = default;
|
|
|
|
|
|
|
|
|
|
if (ulong.TryParse(article.QuerySelector("header > nav > a[class=quote]").TextContent, out ulong _postNumber))
|
|
|
|
|
post.PostNumber = _postNumber;
|
|
|
|
|
else
|
|
|
|
|
post.PostNumber = default;
|
|
|
|
|
|
|
|
|
|
//Get modlog
|
|
|
|
|
getModlog(article.QuerySelector("b.modLog")?.InnerHtml, out var postModlog);
|
|
|
|
|
post.ModLog = postModlog;
|
|
|
|
|
|
|
|
|
|
var figure = article.QuerySelector("figure > figcaption > i");
|
|
|
|
|
if (figure != null)
|
|
|
|
|
{
|
|
|
|
|
//Has image
|
|
|
|
|
if (TryParseImageDimInfo(figure.FirstChild.TextContent, out var _imageSize, out var _x, out var _y))
|
|
|
|
|
{
|
|
|
|
|
post.ImageDimensions = (_x, _y);
|
|
|
|
|
post.ImageSize = _imageSize;
|
|
|
|
|
|
|
|
|
|
post.ImageURL = figure.QuerySelector("a").GetAttribute("href");
|
|
|
|
|
post.ImageFilename = figure.QuerySelector("a").GetAttribute("download");
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
await EncryptIfRequired(post, token);
|
|
|
|
|
yield return post;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await EncryptIfRequired(thread, token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async Task EncryptIfRequired(PostInfo post, CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
if (Config.EncryptDeleted != null)
|
|
|
|
|
{
|
|
|
|
|
if (post.ModLog.ImageDeleted)
|
|
|
|
|
{
|
|
|
|
|
await post.EncryptImageAsync(Config.EncryptDeleted.Value, token);
|
|
|
|
|
}
|
|
|
|
|
if (post.ModLog.PostDeleted)
|
|
|
|
|
{
|
|
|
|
|
await post.EncryptPostAsync(Config.EncryptDeleted.Value, token);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}catch(Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("Encryption for post "+post.PostNumber+" failed: " + ex.Message+"\n"+ex.StackTrace);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected override async IAsyncEnumerable<ThreadInfo> GetThreads(BoardInfo boardInfo, [EnumeratorCancellation] CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
var document = await context.OpenAsync(boardInfo.BoardURL + "catalog", token);
|
|
|
|
|
|
|
|
|
|
await GetBoardInfo(boardInfo, document, token);
|
|
|
|
|
|
|
|
|
|
var threadLinks = document.QuerySelectorAll("#catalog > article > a[class=history]");
|
|
|
|
|
foreach(var link in threadLinks)
|
|
|
|
|
{
|
|
|
|
|
if (link.HasAttribute("href"))
|
|
|
|
|
{
|
|
|
|
|
var href = link.GetAttribute("href");
|
|
|
|
|
if (ulong.TryParse(href, out ulong postNumber))
|
|
|
|
|
{
|
|
|
|
|
yield return new ThreadInfo()
|
|
|
|
|
{
|
|
|
|
|
PostNumber = postNumber,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
using AngleSharp;
|
|
|
|
|
using AngleSharp.Dom;
|
|
|
|
|
using AngleSharp.Io;
|
|
|
|
|
using System;
|
|
|
|
|
using System.Collections.Generic;
|
|
|
|
|
using System.IO;
|
|
|
|
|
using System.Runtime.CompilerServices;
|
|
|
|
|
using System.Text;
|
|
|
|
|
using System.Text.RegularExpressions;
|
|
|
|
|
using System.Threading;
|
|
|
|
|
using System.Threading.Tasks;
|
|
|
|
|
using System.Linq;
|
|
|
|
|
|
|
|
|
|
namespace napdump.Dumpers
|
|
|
|
|
{
|
|
|
|
|
class Nineball : Dumper
|
|
|
|
|
{
|
|
|
|
|
readonly IConfiguration browserConfig;
|
|
|
|
|
readonly IBrowsingContext context;
|
|
|
|
|
readonly ICookieProvider cookies;
|
|
|
|
|
public Nineball(DumperConfig config) : base(config)
|
|
|
|
|
{
|
|
|
|
|
browserConfig = Configuration.Default.WithDefaultCookies().WithDefaultLoader();
|
|
|
|
|
cookies = browserConfig.Services.OfType<ICookieProvider>().First();
|
|
|
|
|
foreach (var c in config.Cookies ?? Array.Empty<(string Url, string Value)>())
|
|
|
|
|
{
|
|
|
|
|
cookies.SetCookie(new Url(c.Url), c.Value);
|
|
|
|
|
}
|
|
|
|
|
context = BrowsingContext.New(browserConfig);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reBoardName = new Regex(@"^(\/.*?\/)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static readonly Regex reBoardNameW = new Regex(@"^\/(\w+)\/", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
protected async Task GetBoardInfo(BoardInfo bi, IDocument document, CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
await Task.Yield();
|
|
|
|
|
token.ThrowIfCancellationRequested();
|
|
|
|
|
bi.Title = document.QuerySelector("body > threads > h1").InnerHtml;
|
|
|
|
|
bi.BoardName = reBoardName.IsMatch(bi.Title) ? reBoardName.Match(bi.Title).Groups[1].Value : bi.Title;
|
|
|
|
|
bi.SafeName = reBoardNameW.IsMatch(bi.Title) ? reBoardNameW.Match(bi.Title).Groups[1].Value : "unbound";
|
|
|
|
|
bi.Description = document.QuerySelector("#banner_info").TextContent;
|
|
|
|
|
bi.Tags = new[] { "meguca", "node", "liveboard" };
|
|
|
|
|
}
|
|
|
|
|
private static readonly Regex reImageDim = new Regex(@"\((\d+) ([kmg]?b), (\d+)x(\d+)\)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static bool TryParseImageDimInfo(string info, out long size, out int x, out int y)
|
|
|
|
|
{
|
|
|
|
|
//Console.WriteLine(info + " " + reImageDim.IsMatch(info));
|
|
|
|
|
if(reImageDim.IsMatch(info))
|
|
|
|
|
{
|
|
|
|
|
var groups = reImageDim.Match(info).Groups;
|
|
|
|
|
|
|
|
|
|
if(long.TryParse(groups[1].Value, out var rawSize) &&
|
|
|
|
|
int.TryParse(groups[3].Value, out x) &&
|
|
|
|
|
int.TryParse(groups[4].Value, out y))
|
|
|
|
|
{
|
|
|
|
|
long multiplier = 1;
|
|
|
|
|
switch (groups[2].Value.ToLower().Trim())
|
|
|
|
|
{
|
|
|
|
|
case "b":
|
|
|
|
|
break;
|
|
|
|
|
case "kb":
|
|
|
|
|
multiplier = 1024;
|
|
|
|
|
break;
|
|
|
|
|
case "mb":
|
|
|
|
|
multiplier = 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
case "gb":
|
|
|
|
|
multiplier = 1024 * 1024 * 1024;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
goto bad;
|
|
|
|
|
}
|
|
|
|
|
size = rawSize & multiplier;
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
bad:
|
|
|
|
|
size = default;
|
|
|
|
|
x = y = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reDateTime = new Regex(@"(\d\d) (Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) (\d\d\d\d)\(\w+\)(\d\d):(\d\d)", RegexOptions.Compiled | RegexOptions.IgnoreCase);
|
|
|
|
|
private static bool TryParseDateTime(string htmlDateTime, out DateTime dt)
|
|
|
|
|
{
|
|
|
|
|
htmlDateTime = htmlDateTime.Trim();
|
|
|
|
|
//Console.WriteLine(htmlDateTime + " " + reDateTime.IsMatch(htmlDateTime));
|
|
|
|
|
if(reDateTime.IsMatch(htmlDateTime))
|
|
|
|
|
{
|
|
|
|
|
var groups = reDateTime.Match(htmlDateTime).Groups;
|
|
|
|
|
|
|
|
|
|
int day = int.Parse(groups[1].Value);
|
|
|
|
|
string month = groups[2].Value;
|
|
|
|
|
int year = int.Parse(groups[3].Value);
|
|
|
|
|
int hour = int.Parse(groups[4].Value);
|
|
|
|
|
int minute = int.Parse(groups[5].Value);
|
|
|
|
|
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
dt = new DateTime(year, month switch
|
|
|
|
|
{
|
|
|
|
|
"Jan" => 1,
|
|
|
|
|
"Feb" => 2,
|
|
|
|
|
"Mar" => 3,
|
|
|
|
|
"Apr" => 4,
|
|
|
|
|
"May" => 5,
|
|
|
|
|
"Jun" => 6,
|
|
|
|
|
"Jul" => 7,
|
|
|
|
|
"Aug" => 8,
|
|
|
|
|
"Sep" => 9,
|
|
|
|
|
"Oct" => 10,
|
|
|
|
|
"Nov" => 11,
|
|
|
|
|
"Dec" => 12,
|
|
|
|
|
_ => throw new InvalidDataException(),
|
|
|
|
|
}, day, hour, minute, 0);
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
catch
|
|
|
|
|
{
|
|
|
|
|
dt = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
dt = default;
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reImageDeleted = new Regex(@"^Image deleted by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex reImageSpoilered = new Regex(@"^Image spoilered by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex rePostDeleted = new Regex(@"^Post deleted by (\w+)$", RegexOptions.Compiled);
|
|
|
|
|
private static readonly Regex reUserBanned = new Regex(@"^User banned by (\w+)(?: for (.+))?$", RegexOptions.Compiled);
|
|
|
|
|
private static void getModlog(string nodeHtml, out Modlog log)
|
|
|
|
|
{
|
|
|
|
|
log = new Modlog();
|
|
|
|
|
if (nodeHtml == null) return;
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
var split = nodeHtml.Split("<br>").Select(x => x.Trim()).Where(x=> x.Length>0);
|
|
|
|
|
|
|
|
|
|
foreach (var line in split)
|
|
|
|
|
{
|
|
|
|
|
if (reImageDeleted.IsMatch(line))
|
|
|
|
|
log.ImageDeleted = AdminInfo.Create(true, reImageDeleted.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (reImageSpoilered.IsMatch(line))
|
|
|
|
|
log.ImageSpoilered = AdminInfo.Create(true, reImageSpoilered.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (rePostDeleted.IsMatch(line))
|
|
|
|
|
log.PostDeleted = AdminInfo.Create(true, rePostDeleted.Match(line).Groups[1].Value);
|
|
|
|
|
|
|
|
|
|
if (reUserBanned.IsMatch(line))
|
|
|
|
|
{
|
|
|
|
|
var match = reUserBanned.Match(line).Groups;
|
|
|
|
|
log.UserBanned = AdminInfo.Create(true, match[1].Value);
|
|
|
|
|
if (match[2].Success)
|
|
|
|
|
log.BanMessage = AdminInfo.Create(match[2].Value, match[1].Value);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
catch(Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("Modlog parsing error: "+ex.Message);
|
|
|
|
|
log = new Modlog();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly Regex reMailTo = new Regex(@"^mailto:", RegexOptions.Compiled);
|
|
|
|
|
protected override async IAsyncEnumerable<PostInfo> GetPosts(ThreadInfo thread, [EnumeratorCancellation] CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
var document = await context.OpenAsync(thread.BoardInfo.BoardURL + thread.PostNumber, token);
|
|
|
|
|
|
|
|
|
|
var section = document.QuerySelector("section");
|
|
|
|
|
thread.Locked = section.ClassList.Contains("locked");
|
|
|
|
|
|
|
|
|
|
(string Name, string Tripcode, string Email, string Capcode) getTripcode(IElement header)
|
|
|
|
|
{
|
|
|
|
|
var bname = header.QuerySelector("b");
|
|
|
|
|
|
|
|
|
|
string name, trip, mail, cap;
|
|
|
|
|
name = trip = mail = cap = null;
|
|
|
|
|
|
|
|
|
|
if(bname.FirstChild.NodeName.ToLower() == "a")
|
|
|
|
|
{
|
|
|
|
|
//Mail link
|
|
|
|
|
mail = bname.FirstElementChild.GetAttribute("href");
|
|
|
|
|
if (reMailTo.IsMatch(mail))
|
|
|
|
|
mail = reMailTo.Replace(mail, "");
|
|
|
|
|
bname = bname.FirstElementChild;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if(bname.ChildNodes.Length > 1 && bname.ChildNodes[1].NodeName=="CODE")
|
|
|
|
|
{
|
|
|
|
|
//Has tripcode & name
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
trip = bname.ChildNodes[1].TextContent;
|
|
|
|
|
if (bname.ChildNodes.Length > 2)
|
|
|
|
|
cap = bname.ChildNodes[2].TextContent;
|
|
|
|
|
}
|
|
|
|
|
else if(bname.ChildNodes.Length>1)
|
|
|
|
|
{
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
cap = bname.ChildNodes[1].TextContent;
|
|
|
|
|
}
|
|
|
|
|
else if(bname.FirstChild.NodeName.ToLower() == "code")
|
|
|
|
|
{
|
|
|
|
|
//Tripcode, no name.
|
|
|
|
|
trip = bname.FirstChild.TextContent;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
//Name, no tripcode
|
|
|
|
|
name = bname.FirstChild.TextContent;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return (name, trip, mail, cap);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
//Get thread's modlog.
|
|
|
|
|
getModlog(section.QuerySelector("b.modLog")?.InnerHtml, out var threadModlog);
|
|
|
|
|
thread.ModLog = threadModlog;
|
|
|
|
|
|
|
|
|
|
//Get thread's info.
|
|
|
|
|
var imageInfo = section.QuerySelector("figure > figcaption > i");
|
|
|
|
|
if (imageInfo != null)
|
|
|
|
|
{
|
|
|
|
|
string imageDimInfo = imageInfo.FirstChild.TextContent;
|
|
|
|
|
if (TryParseImageDimInfo(imageDimInfo, out var _imageSize, out var _x, out var _y))
|
|
|
|
|
{
|
|
|
|
|
thread.ImageSize = _imageSize;
|
|
|
|
|
thread.ImageDimensions = (_x, _y);
|
|
|
|
|
|
|
|
|
|
var imageNameInfo = imageInfo.QuerySelector("a");
|
|
|
|
|
|
|
|
|
|
thread.ImageURL = imageNameInfo.GetAttribute("href");
|
|
|
|
|
thread.ImageFilename = imageNameInfo.GetAttribute("download");
|
|
|
|
|
|
|
|
|
|
if (TryParseDateTime(section.QuerySelector("header > time").FirstChild.TextContent, out var threadTimestamp))
|
|
|
|
|
{
|
|
|
|
|
thread.Timestamp = threadTimestamp;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
thread.Timestamp = default;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
(thread.Name, thread.Tripcode, thread.Email, thread.Capcode) = getTripcode(section.QuerySelector("header"));
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
thread.ImageDimensions = default;
|
|
|
|
|
thread.ImageFilename = null;
|
|
|
|
|
thread.ImageSize = 0;
|
|
|
|
|
thread.ImageURL = null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
thread.Body = section.QuerySelector("blockquote").InnerHtml;
|
|
|
|
|
thread.ThreadURL = document.Url;
|
|
|
|
|
thread.Subject = section.QuerySelector("header > h3")?.TextContent;
|
|
|
|
|
|
|
|
|
|
//Get posts
|
|
|
|
|
foreach (var article in section.QuerySelectorAll("article"))
|
|
|
|
|
{
|
|
|
|
|
var post = new PostInfo()
|
|
|
|
|
{
|
|
|
|
|
Body = article.QuerySelector("blockquote").InnerHtml,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
(post.Name, post.Tripcode, post.Email, post.Capcode) = getTripcode(article.QuerySelector("header"));
|
|
|
|
|
|
|
|
|
|
if (TryParseDateTime(article.QuerySelector("header > time").TextContent, out var _time))
|
|
|
|
|
post.Timestamp = _time;
|
|
|
|
|
else
|
|
|
|
|
post.Timestamp = default;
|
|
|
|
|
|
|
|
|
|
if (ulong.TryParse(article.QuerySelector("header > nav > a[class=quote]").TextContent, out ulong _postNumber))
|
|
|
|
|
post.PostNumber = _postNumber;
|
|
|
|
|
else
|
|
|
|
|
post.PostNumber = default;
|
|
|
|
|
|
|
|
|
|
//Get modlog
|
|
|
|
|
getModlog(article.QuerySelector("b.modLog")?.InnerHtml, out var postModlog);
|
|
|
|
|
post.ModLog = postModlog;
|
|
|
|
|
|
|
|
|
|
var figure = article.QuerySelector("figure > figcaption > i");
|
|
|
|
|
if (figure != null)
|
|
|
|
|
{
|
|
|
|
|
//Has image
|
|
|
|
|
if (TryParseImageDimInfo(figure.FirstChild.TextContent, out var _imageSize, out var _x, out var _y))
|
|
|
|
|
{
|
|
|
|
|
post.ImageDimensions = (_x, _y);
|
|
|
|
|
post.ImageSize = _imageSize;
|
|
|
|
|
|
|
|
|
|
post.ImageURL = figure.QuerySelector("a").GetAttribute("href");
|
|
|
|
|
post.ImageFilename = figure.QuerySelector("a").GetAttribute("download");
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
await EncryptIfRequired(post, token);
|
|
|
|
|
yield return post;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await EncryptIfRequired(thread, token);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async Task EncryptIfRequired(PostInfo post, CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
try
|
|
|
|
|
{
|
|
|
|
|
if (Config.EncryptDeleted != null)
|
|
|
|
|
{
|
|
|
|
|
if (post.ModLog.ImageDeleted)
|
|
|
|
|
{
|
|
|
|
|
await post.EncryptImageAsync(Config.EncryptDeleted.Value, token);
|
|
|
|
|
}
|
|
|
|
|
if (post.ModLog.PostDeleted)
|
|
|
|
|
{
|
|
|
|
|
await post.EncryptPostAsync(Config.EncryptDeleted.Value, token);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}catch(Exception ex)
|
|
|
|
|
{
|
|
|
|
|
Console.WriteLine("Encryption for post "+post.PostNumber+" failed: " + ex.Message+"\n"+ex.StackTrace);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
protected override async IAsyncEnumerable<ThreadInfo> GetThreads(BoardInfo boardInfo, [EnumeratorCancellation] CancellationToken token)
|
|
|
|
|
{
|
|
|
|
|
var document = await context.OpenAsync(boardInfo.BoardURL + "catalog", token);
|
|
|
|
|
|
|
|
|
|
await GetBoardInfo(boardInfo, document, token);
|
|
|
|
|
|
|
|
|
|
var threadLinks = document.QuerySelectorAll("#catalog > article > a[class=history]");
|
|
|
|
|
foreach(var link in threadLinks)
|
|
|
|
|
{
|
|
|
|
|
if (link.HasAttribute("href"))
|
|
|
|
|
{
|
|
|
|
|
var href = link.GetAttribute("href");
|
|
|
|
|
if (ulong.TryParse(href, out ulong postNumber))
|
|
|
|
|
{
|
|
|
|
|
yield return new ThreadInfo()
|
|
|
|
|
{
|
|
|
|
|
PostNumber = postNumber,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|