handle older(?) team membership format

This commit is contained in:
Robin Appelman 2023-11-26 22:59:15 +01:00
commit dc400747fd
7 changed files with 2673 additions and 12 deletions

2
api-server/Cargo.lock generated
View file

@ -1743,8 +1743,6 @@ dependencies = [
[[package]]
name = "ugc-scraper"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11be22a2de36995a81b56450a26132bb8718296f60616e32487f59cd2cd49e12"
dependencies = [
"reqwest",
"scraper",

View file

@ -6,8 +6,8 @@ edition = "2021"
[dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
main_error = "0.1.2"
#ugc-scraper = { path = "../", version = "0.2.1" }
ugc-scraper = "0.2.1"
ugc-scraper = { path = "../", version = "0.2.1" }
#ugc-scraper = "0.2.1"
axum = "0.6.20"
steamid-ng = "1.0.0"
thiserror = "1.0.50"

View file

@ -71,6 +71,8 @@ const DATE_FORMAT: &[FormatItem<'static>] =
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
);
const MEMBER_DATE_ALT_FORMAT: &[FormatItem<'static>] =
format_description!("[month repr:short] [day padding:none], [year]");
const ROSTER_HISTORY_DATE_FORMAT: &[FormatItem<'static>] =
format_description!("[month repr:short] [day padding:none], [year]");

View file

@ -1,9 +1,11 @@
use super::{ElementExt, Parser};
use crate::data::{Membership, NameChange, Record, Team};
use crate::parser::{select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_FORMAT};
use crate::parser::{
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
};
use crate::{ParseError, Result, ScrapeError};
use scraper::{Html, Selector};
use time::{Date, PrimitiveDateTime, UtcOffset};
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
const SELECTOR_TEAM_NAME: &str = ".container .col-md-12 h1 > b";
const SELECTOR_TEAM_TAG: &str = ".container .col-md-12 h1 > span";
@ -257,12 +259,28 @@ impl Parser for TeamParser {
)?;
let role = role.trim().to_string();
let since = since.trim();
let since = PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
let since = if since.starts_with('(') {
let part = since
.split_once('-')
.unwrap_or_default()
.0
.trim()
.trim_start_matches('(');
let date = Date::parse(part, MEMBER_DATE_ALT_FORMAT).map_err(|_| {
ParseError::InvalidDate {
role: "member join date (alternate format)",
date: since.to_string(),
}
})?;
PrimitiveDateTime::new(date, Time::MIDNIGHT).assume_offset(UtcOffset::UTC)
} else {
PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
.map_err(|_| ParseError::InvalidDate {
role: "member join date",
date: since.to_string(),
})?
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap());
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap())
};
Ok(Membership {
name,

2562
tests/data/team_4105.html Normal file

File diff suppressed because it is too large Load diff

View file

@ -29,6 +29,14 @@ fn test_parse_team_html() {
assert_json_snapshot!(parsed);
}
#[test]
fn test_parse_older_team_html() {
let body = read_to_string("tests/data/team_4105.html").unwrap();
let parser = TeamParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}
#[test]
fn test_parse_team_changed_name_html() {
let body = read_to_string("tests/data/team_6929.html").unwrap();

View file

@ -0,0 +1,73 @@
---
source: tests/snapshot.rs
expression: parsed
---
{
"name": "Melting Pot",
"tag": "Melting Pot",
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f7/f75809d7774c917be9883370d772d3099bfe457d_full.jpg",
"format": "TF2 Highlander",
"timezone": "West-Euro",
"division": "Euro Platinum",
"description": "",
"titles": [],
"members": [
{
"name": "uncle spoodg[A]",
"steam_id": 76561198014928031,
"role": "Leader",
"since": "+002012-01-29T00:00:00.000000000Z"
},
{
"name": "CratZ crayonnaise",
"steam_id": 76561198021136779,
"role": "Member",
"since": "+002012-01-30T00:00:00.000000000Z"
},
{
"name": "calm``",
"steam_id": 76561197968127159,
"role": "Member",
"since": "+002012-01-30T00:00:00.000000000Z"
},
{
"name": "Plysse",
"steam_id": 76561198028175255,
"role": "Member",
"since": "+002012-05-30T00:00:00.000000000Z"
},
{
"name": "calm``",
"steam_id": 76561197968127159,
"role": "Member",
"since": "+002012-05-30T00:00:00.000000000Z"
},
{
"name": "pwalt",
"steam_id": 76561197988404333,
"role": "Member",
"since": "+002012-09-14T00:00:00.000000000Z"
}
],
"results": [
{
"season": 8,
"division": "Euro Platinum",
"wins": 1,
"losses": 4
},
{
"season": 7,
"division": "*West European",
"wins": 1,
"losses": 2
},
{
"season": 6,
"division": "*West European",
"wins": 6,
"losses": 3
}
],
"name_changes": []
}