handle empty player name

This commit is contained in:
Robin Appelman 2025-04-15 00:29:20 +02:00
commit abd11dbe45
6 changed files with 6781 additions and 11 deletions

View file

@ -25,6 +25,7 @@ const SELECTOR_TEAM_TITLES: &str = ".container .col-md-3 .white-row-small p > .t
const SELECTOR_TEAM_MEMBER_ROW: &str =
".container .white-row-small > .row-fluid > .col-md-12 > .white-row-light-small";
const SELECTOR_TEAM_MEMBER_LINK: &str = "b > a[href^=\"players_page\"]";
const SELECTOR_TEAM_MEMBER_STEAM_LINK: &str = "div > a[href*=\"profiles/\"]";
const SELECTOR_TEAM_MEMBER_ROLE: &str = ".tinytext";
const SELECTOR_TEAM_MEMBER_SINCE: &str = ".tinytext > em";
@ -58,6 +59,7 @@ pub struct TeamParser {
selector_team_member_row: Selector,
selector_team_member_link: Selector,
selector_team_member_steam_link: Selector,
selector_team_member_role: Selector,
selector_team_member_since: Selector,
@ -96,6 +98,8 @@ impl TeamParser {
selector_team_member_row: Selector::parse(SELECTOR_TEAM_MEMBER_ROW).unwrap(),
selector_team_member_link: Selector::parse(SELECTOR_TEAM_MEMBER_LINK).unwrap(),
selector_team_member_steam_link: Selector::parse(SELECTOR_TEAM_MEMBER_STEAM_LINK)
.unwrap(),
selector_team_member_role: Selector::parse(SELECTOR_TEAM_MEMBER_ROLE).unwrap(),
selector_team_member_since: Selector::parse(SELECTOR_TEAM_MEMBER_SINCE).unwrap(),
@ -247,19 +251,15 @@ impl Parser for TeamParser {
let members = document
.select(&self.selector_team_member_row)
.map(|row| {
let link = row.select(&self.selector_team_member_link).next().ok_or(
ParseError::ElementNotFound {
let link = row
.select(&self.selector_team_member_link)
.next()
.or_else(|| row.select(&self.selector_team_member_steam_link).next())
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_TEAM_MEMBER_LINK,
role: "team member link",
},
)?;
let name = link
.first_text()
.ok_or(ParseError::EmptyText {
selector: SELECTOR_TEAM_MEMBER_LINK,
role: "team member link",
})?
.to_string();
})?;
let name = link.first_text().unwrap_or_default().to_string();
let link = link.attr("href").unwrap_or_default();
let role = select_text(row, &self.selector_team_member_role)

File diff suppressed because it is too large Load diff

3520
tests/data/team_3975.html Normal file

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ use ugc_scraper::parser::{
#[test_case("player_76561198024494988.html", "player")]
#[test_case("player_76561198049312442.html", "player_classes")]
#[test_case("player_76561197967332647.html", "player_empty")]
#[cfg(feature = "serde")]
fn test_parse_player_html(input: &str, name: &str) {
let body = read_to_string(format!("tests/data/{input}")).unwrap();
@ -36,6 +37,7 @@ fn test_parse_player_details_html(input: &str, name: &str) {
#[test_case("team_32437.html", "team_empty_name_change")]
#[test_case("team_29228.html", "team_newlines_join_date")]
#[test_case("team_10763.html", "team_na_4v4")]
#[test_case("team_3975.html", "team_empty_player")]
#[cfg(feature = "serde")]
fn test_parse_team_html(input: &str, name: &str) {
let body = read_to_string(format!("tests/data/{input}")).unwrap();

View file

@ -0,0 +1,18 @@
---
source: tests/snapshot.rs
expression: parsed
---
{
"name": "",
"avatar": "",
"steam_id": "76561197967332647",
"honors": [
{
"format": "TF2 Highlander",
"season": "Season 6 NA Steel",
"team": "Penguin Doom Squad"
}
],
"teams": [],
"favorite_classes": []
}

View file

@ -0,0 +1,45 @@
---
source: tests/snapshot.rs
expression: parsed
---
{
"name": "Penguin Doom Squad",
"tag": "{pDs}",
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/7d/7d23af88d8b08b555db373ab5b65ce973b02f35c.jpg",
"format": "9v9",
"region": "north-america",
"timezone": "West",
"steam_group": "http://steamcommunity.com/groups/pds_team",
"division": "NA Steel",
"description": "",
"titles": [],
"members": [
{
"name": "Muffalopadus",
"steam_id": "76561198007659326",
"role": "member",
"since": "+002012-01-23T00:00:00.000000000Z"
},
{
"name": "pDs DaLton",
"steam_id": "76561197960632490",
"role": "member",
"since": "+002012-01-23T00:00:00.000000000Z"
},
{
"name": "",
"steam_id": "76561197967332647",
"role": "member",
"since": "+002012-01-23T00:00:00.000000000Z"
}
],
"results": [
{
"season": 6,
"division": "NA Steel",
"wins": 8,
"losses": 5
}
],
"name_changes": []
}