mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
handle empty player name
This commit is contained in:
parent
b784854c1d
commit
abd11dbe45
6 changed files with 6781 additions and 11 deletions
|
|
@ -25,6 +25,7 @@ const SELECTOR_TEAM_TITLES: &str = ".container .col-md-3 .white-row-small p > .t
|
||||||
const SELECTOR_TEAM_MEMBER_ROW: &str =
|
const SELECTOR_TEAM_MEMBER_ROW: &str =
|
||||||
".container .white-row-small > .row-fluid > .col-md-12 > .white-row-light-small";
|
".container .white-row-small > .row-fluid > .col-md-12 > .white-row-light-small";
|
||||||
const SELECTOR_TEAM_MEMBER_LINK: &str = "b > a[href^=\"players_page\"]";
|
const SELECTOR_TEAM_MEMBER_LINK: &str = "b > a[href^=\"players_page\"]";
|
||||||
|
const SELECTOR_TEAM_MEMBER_STEAM_LINK: &str = "div > a[href*=\"profiles/\"]";
|
||||||
const SELECTOR_TEAM_MEMBER_ROLE: &str = ".tinytext";
|
const SELECTOR_TEAM_MEMBER_ROLE: &str = ".tinytext";
|
||||||
const SELECTOR_TEAM_MEMBER_SINCE: &str = ".tinytext > em";
|
const SELECTOR_TEAM_MEMBER_SINCE: &str = ".tinytext > em";
|
||||||
|
|
||||||
|
|
@ -58,6 +59,7 @@ pub struct TeamParser {
|
||||||
|
|
||||||
selector_team_member_row: Selector,
|
selector_team_member_row: Selector,
|
||||||
selector_team_member_link: Selector,
|
selector_team_member_link: Selector,
|
||||||
|
selector_team_member_steam_link: Selector,
|
||||||
selector_team_member_role: Selector,
|
selector_team_member_role: Selector,
|
||||||
selector_team_member_since: Selector,
|
selector_team_member_since: Selector,
|
||||||
|
|
||||||
|
|
@ -96,6 +98,8 @@ impl TeamParser {
|
||||||
|
|
||||||
selector_team_member_row: Selector::parse(SELECTOR_TEAM_MEMBER_ROW).unwrap(),
|
selector_team_member_row: Selector::parse(SELECTOR_TEAM_MEMBER_ROW).unwrap(),
|
||||||
selector_team_member_link: Selector::parse(SELECTOR_TEAM_MEMBER_LINK).unwrap(),
|
selector_team_member_link: Selector::parse(SELECTOR_TEAM_MEMBER_LINK).unwrap(),
|
||||||
|
selector_team_member_steam_link: Selector::parse(SELECTOR_TEAM_MEMBER_STEAM_LINK)
|
||||||
|
.unwrap(),
|
||||||
selector_team_member_role: Selector::parse(SELECTOR_TEAM_MEMBER_ROLE).unwrap(),
|
selector_team_member_role: Selector::parse(SELECTOR_TEAM_MEMBER_ROLE).unwrap(),
|
||||||
selector_team_member_since: Selector::parse(SELECTOR_TEAM_MEMBER_SINCE).unwrap(),
|
selector_team_member_since: Selector::parse(SELECTOR_TEAM_MEMBER_SINCE).unwrap(),
|
||||||
|
|
||||||
|
|
@ -247,19 +251,15 @@ impl Parser for TeamParser {
|
||||||
let members = document
|
let members = document
|
||||||
.select(&self.selector_team_member_row)
|
.select(&self.selector_team_member_row)
|
||||||
.map(|row| {
|
.map(|row| {
|
||||||
let link = row.select(&self.selector_team_member_link).next().ok_or(
|
let link = row
|
||||||
ParseError::ElementNotFound {
|
.select(&self.selector_team_member_link)
|
||||||
|
.next()
|
||||||
|
.or_else(|| row.select(&self.selector_team_member_steam_link).next())
|
||||||
|
.ok_or(ParseError::ElementNotFound {
|
||||||
selector: SELECTOR_TEAM_MEMBER_LINK,
|
selector: SELECTOR_TEAM_MEMBER_LINK,
|
||||||
role: "team member link",
|
role: "team member link",
|
||||||
},
|
})?;
|
||||||
)?;
|
let name = link.first_text().unwrap_or_default().to_string();
|
||||||
let name = link
|
|
||||||
.first_text()
|
|
||||||
.ok_or(ParseError::EmptyText {
|
|
||||||
selector: SELECTOR_TEAM_MEMBER_LINK,
|
|
||||||
role: "team member link",
|
|
||||||
})?
|
|
||||||
.to_string();
|
|
||||||
let link = link.attr("href").unwrap_or_default();
|
let link = link.attr("href").unwrap_or_default();
|
||||||
|
|
||||||
let role = select_text(row, &self.selector_team_member_role)
|
let role = select_text(row, &self.selector_team_member_role)
|
||||||
|
|
|
||||||
3185
tests/data/player_76561197967332647.html
Normal file
3185
tests/data/player_76561197967332647.html
Normal file
File diff suppressed because it is too large
Load diff
3520
tests/data/team_3975.html
Normal file
3520
tests/data/team_3975.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -12,6 +12,7 @@ use ugc_scraper::parser::{
|
||||||
|
|
||||||
#[test_case("player_76561198024494988.html", "player")]
|
#[test_case("player_76561198024494988.html", "player")]
|
||||||
#[test_case("player_76561198049312442.html", "player_classes")]
|
#[test_case("player_76561198049312442.html", "player_classes")]
|
||||||
|
#[test_case("player_76561197967332647.html", "player_empty")]
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
fn test_parse_player_html(input: &str, name: &str) {
|
fn test_parse_player_html(input: &str, name: &str) {
|
||||||
let body = read_to_string(format!("tests/data/{input}")).unwrap();
|
let body = read_to_string(format!("tests/data/{input}")).unwrap();
|
||||||
|
|
@ -36,6 +37,7 @@ fn test_parse_player_details_html(input: &str, name: &str) {
|
||||||
#[test_case("team_32437.html", "team_empty_name_change")]
|
#[test_case("team_32437.html", "team_empty_name_change")]
|
||||||
#[test_case("team_29228.html", "team_newlines_join_date")]
|
#[test_case("team_29228.html", "team_newlines_join_date")]
|
||||||
#[test_case("team_10763.html", "team_na_4v4")]
|
#[test_case("team_10763.html", "team_na_4v4")]
|
||||||
|
#[test_case("team_3975.html", "team_empty_player")]
|
||||||
#[cfg(feature = "serde")]
|
#[cfg(feature = "serde")]
|
||||||
fn test_parse_team_html(input: &str, name: &str) {
|
fn test_parse_team_html(input: &str, name: &str) {
|
||||||
let body = read_to_string(format!("tests/data/{input}")).unwrap();
|
let body = read_to_string(format!("tests/data/{input}")).unwrap();
|
||||||
|
|
|
||||||
18
tests/snapshots/snapshot__parse_player_empty_html.snap
Normal file
18
tests/snapshots/snapshot__parse_player_empty_html.snap
Normal file
|
|
@ -0,0 +1,18 @@
|
||||||
|
---
|
||||||
|
source: tests/snapshot.rs
|
||||||
|
expression: parsed
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"name": "",
|
||||||
|
"avatar": "",
|
||||||
|
"steam_id": "76561197967332647",
|
||||||
|
"honors": [
|
||||||
|
{
|
||||||
|
"format": "TF2 Highlander",
|
||||||
|
"season": "Season 6 NA Steel",
|
||||||
|
"team": "Penguin Doom Squad"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"teams": [],
|
||||||
|
"favorite_classes": []
|
||||||
|
}
|
||||||
45
tests/snapshots/snapshot__parse_team_empty_player_html.snap
Normal file
45
tests/snapshots/snapshot__parse_team_empty_player_html.snap
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
---
|
||||||
|
source: tests/snapshot.rs
|
||||||
|
expression: parsed
|
||||||
|
---
|
||||||
|
{
|
||||||
|
"name": "Penguin Doom Squad",
|
||||||
|
"tag": "{pDs}",
|
||||||
|
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/7d/7d23af88d8b08b555db373ab5b65ce973b02f35c.jpg",
|
||||||
|
"format": "9v9",
|
||||||
|
"region": "north-america",
|
||||||
|
"timezone": "West",
|
||||||
|
"steam_group": "http://steamcommunity.com/groups/pds_team",
|
||||||
|
"division": "NA Steel",
|
||||||
|
"description": "",
|
||||||
|
"titles": [],
|
||||||
|
"members": [
|
||||||
|
{
|
||||||
|
"name": "Muffalopadus",
|
||||||
|
"steam_id": "76561198007659326",
|
||||||
|
"role": "member",
|
||||||
|
"since": "+002012-01-23T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "pDs DaLton",
|
||||||
|
"steam_id": "76561197960632490",
|
||||||
|
"role": "member",
|
||||||
|
"since": "+002012-01-23T00:00:00.000000000Z"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "",
|
||||||
|
"steam_id": "76561197967332647",
|
||||||
|
"role": "member",
|
||||||
|
"since": "+002012-01-23T00:00:00.000000000Z"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"results": [
|
||||||
|
{
|
||||||
|
"season": 6,
|
||||||
|
"division": "NA Steel",
|
||||||
|
"wins": 8,
|
||||||
|
"losses": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"name_changes": []
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue