mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
handle older(?) team membership format
This commit is contained in:
parent
4b11b636c7
commit
dc400747fd
7 changed files with 2673 additions and 12 deletions
2
api-server/Cargo.lock
generated
2
api-server/Cargo.lock
generated
|
|
@ -1743,8 +1743,6 @@ dependencies = [
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ugc-scraper"
|
name = "ugc-scraper"
|
||||||
version = "0.2.1"
|
version = "0.2.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "11be22a2de36995a81b56450a26132bb8718296f60616e32487f59cd2cd49e12"
|
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"scraper",
|
"scraper",
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,8 @@ edition = "2021"
|
||||||
[dependencies]
|
[dependencies]
|
||||||
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
|
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
|
||||||
main_error = "0.1.2"
|
main_error = "0.1.2"
|
||||||
#ugc-scraper = { path = "../", version = "0.2.1" }
|
ugc-scraper = { path = "../", version = "0.2.1" }
|
||||||
ugc-scraper = "0.2.1"
|
#ugc-scraper = "0.2.1"
|
||||||
axum = "0.6.20"
|
axum = "0.6.20"
|
||||||
steamid-ng = "1.0.0"
|
steamid-ng = "1.0.0"
|
||||||
thiserror = "1.0.50"
|
thiserror = "1.0.50"
|
||||||
|
|
|
||||||
|
|
@ -71,6 +71,8 @@ const DATE_FORMAT: &[FormatItem<'static>] =
|
||||||
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
||||||
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
||||||
);
|
);
|
||||||
|
const MEMBER_DATE_ALT_FORMAT: &[FormatItem<'static>] =
|
||||||
|
format_description!("[month repr:short] [day padding:none], [year]");
|
||||||
const ROSTER_HISTORY_DATE_FORMAT: &[FormatItem<'static>] =
|
const ROSTER_HISTORY_DATE_FORMAT: &[FormatItem<'static>] =
|
||||||
format_description!("[month repr:short] [day padding:none], [year]");
|
format_description!("[month repr:short] [day padding:none], [year]");
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,11 @@
|
||||||
use super::{ElementExt, Parser};
|
use super::{ElementExt, Parser};
|
||||||
use crate::data::{Membership, NameChange, Record, Team};
|
use crate::data::{Membership, NameChange, Record, Team};
|
||||||
use crate::parser::{select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_FORMAT};
|
use crate::parser::{
|
||||||
|
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
|
||||||
|
};
|
||||||
use crate::{ParseError, Result, ScrapeError};
|
use crate::{ParseError, Result, ScrapeError};
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
use time::{Date, PrimitiveDateTime, UtcOffset};
|
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
|
||||||
|
|
||||||
const SELECTOR_TEAM_NAME: &str = ".container .col-md-12 h1 > b";
|
const SELECTOR_TEAM_NAME: &str = ".container .col-md-12 h1 > b";
|
||||||
const SELECTOR_TEAM_TAG: &str = ".container .col-md-12 h1 > span";
|
const SELECTOR_TEAM_TAG: &str = ".container .col-md-12 h1 > span";
|
||||||
|
|
@ -257,12 +259,28 @@ impl Parser for TeamParser {
|
||||||
)?;
|
)?;
|
||||||
let role = role.trim().to_string();
|
let role = role.trim().to_string();
|
||||||
let since = since.trim();
|
let since = since.trim();
|
||||||
let since = PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
let since = if since.starts_with('(') {
|
||||||
|
let part = since
|
||||||
|
.split_once('-')
|
||||||
|
.unwrap_or_default()
|
||||||
|
.0
|
||||||
|
.trim()
|
||||||
|
.trim_start_matches('(');
|
||||||
|
let date = Date::parse(part, MEMBER_DATE_ALT_FORMAT).map_err(|_| {
|
||||||
|
ParseError::InvalidDate {
|
||||||
|
role: "member join date (alternate format)",
|
||||||
|
date: since.to_string(),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
PrimitiveDateTime::new(date, Time::MIDNIGHT).assume_offset(UtcOffset::UTC)
|
||||||
|
} else {
|
||||||
|
PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||||
.map_err(|_| ParseError::InvalidDate {
|
.map_err(|_| ParseError::InvalidDate {
|
||||||
role: "member join date",
|
role: "member join date",
|
||||||
date: since.to_string(),
|
date: since.to_string(),
|
||||||
})?
|
})?
|
||||||
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap());
|
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap())
|
||||||
|
};
|
||||||
|
|
||||||
Ok(Membership {
|
Ok(Membership {
|
||||||
name,
|
name,
|
||||||
|
|
|
||||||
2562
tests/data/team_4105.html
Normal file
2562
tests/data/team_4105.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -29,6 +29,14 @@ fn test_parse_team_html() {
|
||||||
assert_json_snapshot!(parsed);
|
assert_json_snapshot!(parsed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_parse_older_team_html() {
|
||||||
|
let body = read_to_string("tests/data/team_4105.html").unwrap();
|
||||||
|
let parser = TeamParser::new();
|
||||||
|
let parsed = parser.parse(&body).unwrap();
|
||||||
|
assert_json_snapshot!(parsed);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_parse_team_changed_name_html() {
|
fn test_parse_team_changed_name_html() {
|
||||||
let body = read_to_string("tests/data/team_6929.html").unwrap();
|
let body = read_to_string("tests/data/team_6929.html").unwrap();
|
||||||
|
|
|
||||||
73
tests/snapshots/snapshot__parse_older_team_html.snap
Normal file
73
tests/snapshots/snapshot__parse_older_team_html.snap
Normal file
|
|
@ -0,0 +1,73 @@
|
||||||
|
---
|
||||||
|
source: tests/snapshot.rs
|
||||||
|
expression: parsed
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"name": "Melting Pot",
|
||||||
|
"tag": "Melting Pot",
|
||||||
|
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f7/f75809d7774c917be9883370d772d3099bfe457d_full.jpg",
|
||||||
|
"format": "TF2 Highlander",
|
||||||
|
"timezone": "West-Euro",
|
||||||
|
"division": "Euro Platinum",
|
||||||
|
"description": "",
|
||||||
|
"titles": [],
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"name": "uncle spoodg[A]",
|
||||||
|
"steam_id": 76561198014928031,
|
||||||
|
"role": "Leader",
|
||||||
|
"since": "+002012-01-29T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "CratZ crayonnaise",
|
||||||
|
"steam_id": 76561198021136779,
|
||||||
|
"role": "Member",
|
||||||
|
"since": "+002012-01-30T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "calm``",
|
||||||
|
"steam_id": 76561197968127159,
|
||||||
|
"role": "Member",
|
||||||
|
"since": "+002012-01-30T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "Plysse",
|
||||||
|
"steam_id": 76561198028175255,
|
||||||
|
"role": "Member",
|
||||||
|
"since": "+002012-05-30T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "calm``",
|
||||||
|
"steam_id": 76561197968127159,
|
||||||
|
"role": "Member",
|
||||||
|
"since": "+002012-05-30T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "pwalt",
|
||||||
|
"steam_id": 76561197988404333,
|
||||||
|
"role": "Member",
|
||||||
|
"since": "+002012-09-14T00:00:00.000000000Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"season": 8,
|
||||||
|
"division": "Euro Platinum",
|
||||||
|
"wins": 1,
|
||||||
|
"losses": 4
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"season": 7,
|
||||||
|
"division": "*West European",
|
||||||
|
"wins": 1,
|
||||||
|
"losses": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"season": 6,
|
||||||
|
"division": "*West European",
|
||||||
|
"wins": 6,
|
||||||
|
"losses": 3
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name_changes": []
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue