mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 18:24:10 +02:00
handle older(?) team membership format
This commit is contained in:
parent
4b11b636c7
commit
dc400747fd
7 changed files with 2673 additions and 12 deletions
2
api-server/Cargo.lock
generated
2
api-server/Cargo.lock
generated
|
|
@ -1743,8 +1743,6 @@ dependencies = [
|
|||
[[package]]
|
||||
name = "ugc-scraper"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "11be22a2de36995a81b56450a26132bb8718296f60616e32487f59cd2cd49e12"
|
||||
dependencies = [
|
||||
"reqwest",
|
||||
"scraper",
|
||||
|
|
|
|||
|
|
@ -6,8 +6,8 @@ edition = "2021"
|
|||
[dependencies]
|
||||
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
|
||||
main_error = "0.1.2"
|
||||
#ugc-scraper = { path = "../", version = "0.2.1" }
|
||||
ugc-scraper = "0.2.1"
|
||||
ugc-scraper = { path = "../", version = "0.2.1" }
|
||||
#ugc-scraper = "0.2.1"
|
||||
axum = "0.6.20"
|
||||
steamid-ng = "1.0.0"
|
||||
thiserror = "1.0.50"
|
||||
|
|
|
|||
|
|
@ -71,6 +71,8 @@ const DATE_FORMAT: &[FormatItem<'static>] =
|
|||
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
||||
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
||||
);
|
||||
const MEMBER_DATE_ALT_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month repr:short] [day padding:none], [year]");
|
||||
const ROSTER_HISTORY_DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month repr:short] [day padding:none], [year]");
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
use super::{ElementExt, Parser};
|
||||
use crate::data::{Membership, NameChange, Record, Team};
|
||||
use crate::parser::{select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_FORMAT};
|
||||
use crate::parser::{
|
||||
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
|
||||
};
|
||||
use crate::{ParseError, Result, ScrapeError};
|
||||
use scraper::{Html, Selector};
|
||||
use time::{Date, PrimitiveDateTime, UtcOffset};
|
||||
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
|
||||
|
||||
const SELECTOR_TEAM_NAME: &str = ".container .col-md-12 h1 > b";
|
||||
const SELECTOR_TEAM_TAG: &str = ".container .col-md-12 h1 > span";
|
||||
|
|
@ -257,12 +259,28 @@ impl Parser for TeamParser {
|
|||
)?;
|
||||
let role = role.trim().to_string();
|
||||
let since = since.trim();
|
||||
let since = PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||
.map_err(|_| ParseError::InvalidDate {
|
||||
role: "member join date",
|
||||
date: since.to_string(),
|
||||
})?
|
||||
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap());
|
||||
let since = if since.starts_with('(') {
|
||||
let part = since
|
||||
.split_once('-')
|
||||
.unwrap_or_default()
|
||||
.0
|
||||
.trim()
|
||||
.trim_start_matches('(');
|
||||
let date = Date::parse(part, MEMBER_DATE_ALT_FORMAT).map_err(|_| {
|
||||
ParseError::InvalidDate {
|
||||
role: "member join date (alternate format)",
|
||||
date: since.to_string(),
|
||||
}
|
||||
})?;
|
||||
PrimitiveDateTime::new(date, Time::MIDNIGHT).assume_offset(UtcOffset::UTC)
|
||||
} else {
|
||||
PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||
.map_err(|_| ParseError::InvalidDate {
|
||||
role: "member join date",
|
||||
date: since.to_string(),
|
||||
})?
|
||||
.assume_offset(UtcOffset::from_hms(-5, 0, 0).unwrap())
|
||||
};
|
||||
|
||||
Ok(Membership {
|
||||
name,
|
||||
|
|
|
|||
2562
tests/data/team_4105.html
Normal file
2562
tests/data/team_4105.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -29,6 +29,14 @@ fn test_parse_team_html() {
|
|||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_older_team_html() {
|
||||
let body = read_to_string("tests/data/team_4105.html").unwrap();
|
||||
let parser = TeamParser::new();
|
||||
let parsed = parser.parse(&body).unwrap();
|
||||
assert_json_snapshot!(parsed);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_parse_team_changed_name_html() {
|
||||
let body = read_to_string("tests/data/team_6929.html").unwrap();
|
||||
|
|
|
|||
73
tests/snapshots/snapshot__parse_older_team_html.snap
Normal file
73
tests/snapshots/snapshot__parse_older_team_html.snap
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
---
|
||||
source: tests/snapshot.rs
|
||||
expression: parsed
|
||||
---
|
||||
{
|
||||
"name": "Melting Pot",
|
||||
"tag": "Melting Pot",
|
||||
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f7/f75809d7774c917be9883370d772d3099bfe457d_full.jpg",
|
||||
"format": "TF2 Highlander",
|
||||
"timezone": "West-Euro",
|
||||
"division": "Euro Platinum",
|
||||
"description": "",
|
||||
"titles": [],
|
||||
"members": [
|
||||
{
|
||||
"name": "uncle spoodg[A]",
|
||||
"steam_id": 76561198014928031,
|
||||
"role": "Leader",
|
||||
"since": "+002012-01-29T00:00:00.000000000Z"
|
||||
},
|
||||
{
|
||||
"name": "CratZ crayonnaise",
|
||||
"steam_id": 76561198021136779,
|
||||
"role": "Member",
|
||||
"since": "+002012-01-30T00:00:00.000000000Z"
|
||||
},
|
||||
{
|
||||
"name": "calm``",
|
||||
"steam_id": 76561197968127159,
|
||||
"role": "Member",
|
||||
"since": "+002012-01-30T00:00:00.000000000Z"
|
||||
},
|
||||
{
|
||||
"name": "Plysse",
|
||||
"steam_id": 76561198028175255,
|
||||
"role": "Member",
|
||||
"since": "+002012-05-30T00:00:00.000000000Z"
|
||||
},
|
||||
{
|
||||
"name": "calm``",
|
||||
"steam_id": 76561197968127159,
|
||||
"role": "Member",
|
||||
"since": "+002012-05-30T00:00:00.000000000Z"
|
||||
},
|
||||
{
|
||||
"name": "pwalt",
|
||||
"steam_id": 76561197988404333,
|
||||
"role": "Member",
|
||||
"since": "+002012-09-14T00:00:00.000000000Z"
|
||||
}
|
||||
],
|
||||
"results": [
|
||||
{
|
||||
"season": 8,
|
||||
"division": "Euro Platinum",
|
||||
"wins": 1,
|
||||
"losses": 4
|
||||
},
|
||||
{
|
||||
"season": 7,
|
||||
"division": "*West European",
|
||||
"wins": 1,
|
||||
"losses": 2
|
||||
},
|
||||
{
|
||||
"season": 6,
|
||||
"division": "*West European",
|
||||
"wins": 6,
|
||||
"losses": 3
|
||||
}
|
||||
],
|
||||
"name_changes": []
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue