mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
team parse fixes
This commit is contained in:
parent
8fd98b708f
commit
ddbac7dc79
16 changed files with 4505 additions and 295 deletions
|
|
@ -58,7 +58,7 @@ fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a s
|
|||
const DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
||||
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
||||
"[month repr:short] [day padding:none], [year] / [hour padding:none]:[minute] [period] (ET)"
|
||||
);
|
||||
const MEMBER_DATE_ALT_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month repr:short] [day padding:none], [year]");
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@ use crate::parser::{
|
|||
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
|
||||
};
|
||||
use crate::{ParseError, Result, ScrapeError};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use std::str::FromStr;
|
||||
use std::sync::OnceLock;
|
||||
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
|
||||
use ugc_scraper_types::{GameMode, Region};
|
||||
|
||||
|
|
@ -112,10 +114,14 @@ impl TeamParser {
|
|||
}
|
||||
}
|
||||
|
||||
static WHITESPACE_REGEX: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
impl Parser for TeamParser {
|
||||
type Output = Team;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let whitespace_regex = WHITESPACE_REGEX.get_or_init(|| Regex::new("[\n\t ]+").unwrap());
|
||||
|
||||
let document = Html::parse_document(document);
|
||||
let root = document.root_element();
|
||||
let mut name = select_text(root, &self.selector_name)
|
||||
|
|
@ -175,6 +181,7 @@ impl Parser for TeamParser {
|
|||
let region = division
|
||||
.split(' ')
|
||||
.find_map(|part| Region::from_str(part).ok())
|
||||
.or_else(|| Region::from_str(&division).ok())
|
||||
.ok_or_else(|| ParseError::InvalidText {
|
||||
text: division.clone(),
|
||||
role: "team region",
|
||||
|
|
@ -279,7 +286,7 @@ impl Parser for TeamParser {
|
|||
},
|
||||
)?;
|
||||
let role = role.trim().to_string();
|
||||
let since = since.trim();
|
||||
let since = whitespace_regex.replace_all(since.trim(), " ");
|
||||
let since = if since.starts_with('(') {
|
||||
let part = since
|
||||
.split_once('-')
|
||||
|
|
@ -295,7 +302,7 @@ impl Parser for TeamParser {
|
|||
})?;
|
||||
PrimitiveDateTime::new(date, Time::MIDNIGHT).assume_offset(UtcOffset::UTC)
|
||||
} else {
|
||||
PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||
PrimitiveDateTime::parse(&since, MEMBER_DATE_FORMAT)
|
||||
.map_err(|_| ParseError::InvalidDate {
|
||||
role: "member join date",
|
||||
date: since.to_string(),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue