mirror of
https://codeberg.org/icewind/ugc-scaper.git
synced 2026-06-03 10:14:11 +02:00
team parse fixes
This commit is contained in:
parent
8fd98b708f
commit
ddbac7dc79
16 changed files with 4505 additions and 295 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
|
@ -1884,6 +1884,7 @@ version = "0.5.0"
|
|||
dependencies = [
|
||||
"insta",
|
||||
"main_error",
|
||||
"regex",
|
||||
"reqwest",
|
||||
"scraper",
|
||||
"steamid-ng",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ thiserror = "2.0.3"
|
|||
time = { version = "0.3.41", features = ["parsing", "macros"] }
|
||||
steamid-ng = "1.0.0"
|
||||
ugc-scraper-types = { version = "0.2.0", path = "./types" }
|
||||
regex = "1.11.1"
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread", "rt"] }
|
||||
|
|
|
|||
583
api-server/Cargo.lock
generated
583
api-server/Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -7,7 +7,8 @@ edition = "2021"
|
|||
tokio = { version = "1.44.2", features = ["macros", "rt-multi-thread", "rt", "signal"] }
|
||||
main_error = "0.1.2"
|
||||
#ugc-scraper = { version = "*", path = ".." }
|
||||
ugc-scraper = "0.4.4"
|
||||
ugc-scraper = { version = "0.5.0", git = "https://github.com/icewind1991/ugc-scaper" }
|
||||
#ugc-scraper = "0.5.0"
|
||||
axum = "0.8.3"
|
||||
steamid-ng = "1.0.0"
|
||||
thiserror = "2.0.12"
|
||||
|
|
|
|||
|
|
@ -62,15 +62,15 @@ async fn main() -> MainResult {
|
|||
// build our application with a route
|
||||
let app = Router::new()
|
||||
.route("/", get(handler))
|
||||
.route("/player/:id", get(player))
|
||||
.route("/player/:id/history", get(player_history))
|
||||
.route("/teams/:format", get(teams))
|
||||
.route("/transactions/:format", get(transactions))
|
||||
.route("/team/:id", get(team))
|
||||
.route("/team/:id/roster", get(team_roster))
|
||||
.route("/team/:id/matches", get(team_matches))
|
||||
.route("/match/:id", get(match_page))
|
||||
.route("/maps/:format", get(map_history))
|
||||
.route("/player/{id}", get(player))
|
||||
.route("/player/{id}/history", get(player_history))
|
||||
.route("/teams/{format}", get(teams))
|
||||
.route("/transactions/{format}", get(transactions))
|
||||
.route("/team/{id}", get(team))
|
||||
.route("/team/{id}/roster", get(team_roster))
|
||||
.route("/team/{id}/matches", get(team_matches))
|
||||
.route("/match/{id}", get(match_page))
|
||||
.route("/maps/{format}", get(map_history))
|
||||
.with_state(AppState::default());
|
||||
|
||||
let listener = TcpListener::bind((Ipv4Addr::new(127, 0, 0, 1), port)).await?;
|
||||
|
|
|
|||
|
|
@ -29,5 +29,8 @@ rustPlatform.buildRustPackage rec {
|
|||
|
||||
cargoLock = {
|
||||
lockFile = ./api-server/Cargo.lock;
|
||||
outputHashes = {
|
||||
"ugc-scraper-0.5.0" = "sha256-xuvuhNLKCgI/wPhMXPxBlgZGdkn6qnpxCV17TCNg/xM=";
|
||||
};
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ fn select_last_text<'a>(el: ElementRef<'a>, selector: &Selector) -> Option<&'a s
|
|||
const DATE_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month padding:none]/[day padding:none]/[year]");
|
||||
const MEMBER_DATE_FORMAT: &[FormatItem<'static>] = format_description!(
|
||||
"[month repr:short] [day padding:none], [year]\n/\n[hour padding:none]:[minute] [period]\n(ET)"
|
||||
"[month repr:short] [day padding:none], [year] / [hour padding:none]:[minute] [period] (ET)"
|
||||
);
|
||||
const MEMBER_DATE_ALT_FORMAT: &[FormatItem<'static>] =
|
||||
format_description!("[month repr:short] [day padding:none], [year]");
|
||||
|
|
|
|||
|
|
@ -4,8 +4,10 @@ use crate::parser::{
|
|||
select_text, steam_id_from_link, DATE_FORMAT, MEMBER_DATE_ALT_FORMAT, MEMBER_DATE_FORMAT,
|
||||
};
|
||||
use crate::{ParseError, Result, ScrapeError};
|
||||
use regex::Regex;
|
||||
use scraper::{Html, Selector};
|
||||
use std::str::FromStr;
|
||||
use std::sync::OnceLock;
|
||||
use time::{Date, PrimitiveDateTime, Time, UtcOffset};
|
||||
use ugc_scraper_types::{GameMode, Region};
|
||||
|
||||
|
|
@ -112,10 +114,14 @@ impl TeamParser {
|
|||
}
|
||||
}
|
||||
|
||||
static WHITESPACE_REGEX: OnceLock<Regex> = OnceLock::new();
|
||||
|
||||
impl Parser for TeamParser {
|
||||
type Output = Team;
|
||||
|
||||
fn parse(&self, document: &str) -> Result<Self::Output> {
|
||||
let whitespace_regex = WHITESPACE_REGEX.get_or_init(|| Regex::new("[\n\t ]+").unwrap());
|
||||
|
||||
let document = Html::parse_document(document);
|
||||
let root = document.root_element();
|
||||
let mut name = select_text(root, &self.selector_name)
|
||||
|
|
@ -175,6 +181,7 @@ impl Parser for TeamParser {
|
|||
let region = division
|
||||
.split(' ')
|
||||
.find_map(|part| Region::from_str(part).ok())
|
||||
.or_else(|| Region::from_str(&division).ok())
|
||||
.ok_or_else(|| ParseError::InvalidText {
|
||||
text: division.clone(),
|
||||
role: "team region",
|
||||
|
|
@ -279,7 +286,7 @@ impl Parser for TeamParser {
|
|||
},
|
||||
)?;
|
||||
let role = role.trim().to_string();
|
||||
let since = since.trim();
|
||||
let since = whitespace_regex.replace_all(since.trim(), " ");
|
||||
let since = if since.starts_with('(') {
|
||||
let part = since
|
||||
.split_once('-')
|
||||
|
|
@ -295,7 +302,7 @@ impl Parser for TeamParser {
|
|||
})?;
|
||||
PrimitiveDateTime::new(date, Time::MIDNIGHT).assume_offset(UtcOffset::UTC)
|
||||
} else {
|
||||
PrimitiveDateTime::parse(since, MEMBER_DATE_FORMAT)
|
||||
PrimitiveDateTime::parse(&since, MEMBER_DATE_FORMAT)
|
||||
.map_err(|_| ParseError::InvalidDate {
|
||||
role: "member join date",
|
||||
date: since.to_string(),
|
||||
|
|
|
|||
4080
tests/data/team_29228.html
Normal file
4080
tests/data/team_29228.html
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -34,6 +34,7 @@ fn test_parse_player_details_html(input: &str, name: &str) {
|
|||
#[test_case("team_8157.html", "team_no_tz")]
|
||||
#[test_case("team_6929.html", "team_changed_name")]
|
||||
#[test_case("team_32437.html", "team_empty_name_change")]
|
||||
#[test_case("team_29228.html", "team_newlines_join_date")]
|
||||
#[cfg(feature = "serde")]
|
||||
fn test_parse_team_html(input: &str, name: &str) {
|
||||
let body = read_to_string(format!("tests/data/{input}")).unwrap();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ expression: parsed
|
|||
"tag": "Melting Pot",
|
||||
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/f7/f75809d7774c917be9883370d772d3099bfe457d_full.jpg",
|
||||
"format": "9v9",
|
||||
"region": "Euro",
|
||||
"region": "Europe",
|
||||
"timezone": "West-Euro",
|
||||
"steam_group": "http://steamcommunity.com/groups/Melintongpotsss",
|
||||
"division": "Euro Platinum",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ expression: parsed
|
|||
"tag": "Europe",
|
||||
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/09/096a30b1025c586f9d41c686077129f6e86998d0_full.jpg",
|
||||
"format": "6v6",
|
||||
"region": "Europe",
|
||||
"timezone": "West-Euro",
|
||||
"steam_group": null,
|
||||
"division": "Europe",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ expression: parsed
|
|||
"tag": "PNKTSU",
|
||||
"image": "clan_avatars/32437_thumbnail.jpg",
|
||||
"format": "9v9",
|
||||
"region": "NorthAmerica",
|
||||
"timezone": null,
|
||||
"steam_group": "https://steamcommunity.com/groups/xyxxx-",
|
||||
"division": "North America",
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ expression: parsed
|
|||
"tag": "-Xe-",
|
||||
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/db/dbabbd8bab7ccf6d27a9d4ca2e73a76e085bb201_full.jpg",
|
||||
"format": "9v9",
|
||||
"region": "Euro",
|
||||
"region": "Europe",
|
||||
"timezone": "West-Euro",
|
||||
"steam_group": "https://steamcommunity.com/groups/XenonxTF2",
|
||||
"division": "Euro Platinum",
|
||||
|
|
|
|||
|
|
@ -0,0 +1,87 @@
|
|||
---
|
||||
source: tests/snapshot.rs
|
||||
expression: parsed
|
||||
---
|
||||
{
|
||||
"name": "#1 Intercollegiate TF2 Team",
|
||||
"tag": "#1TF2Team",
|
||||
"image": "https://steamcdn-a.akamaihd.net/steamcommunity/public/images/avatars/b9/b9cc1e4949f5e6d0364f9ee59a13cc931fd3aab1_full.jpg",
|
||||
"format": "9v9",
|
||||
"region": "NorthAmerica",
|
||||
"timezone": "East",
|
||||
"steam_group": "http://steamcommunity.com/groups/NumberOneTF2Team",
|
||||
"division": "Main NA",
|
||||
"description": "We're uncontested #1. (pls contact copycat for any team stuff)",
|
||||
"titles": [],
|
||||
"members": [
|
||||
{
|
||||
"name": "copycat",
|
||||
"steam_id": "76561198072397106",
|
||||
"role": "Leader",
|
||||
"since": "+002019-01-22T11:04:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "Mei",
|
||||
"steam_id": "76561198134574357",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T11:07:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "melstrom",
|
||||
"steam_id": "76561198068890768",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T11:07:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "Otter Speaking Eng",
|
||||
"steam_id": "76561198073466450",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T11:13:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "DarkSlayer415",
|
||||
"steam_id": "76561198053913751",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T05:54:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "corn face",
|
||||
"steam_id": "76561198066113821",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T06:59:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "erin",
|
||||
"steam_id": "76561198307572837",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-22T07:17:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "Naps",
|
||||
"steam_id": "76561198061524698",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-23T04:16:00.000000000-05:00"
|
||||
},
|
||||
{
|
||||
"name": "java",
|
||||
"steam_id": "76561198158291013",
|
||||
"role": "Member",
|
||||
"since": "+002019-01-23T08:52:00.000000000-05:00"
|
||||
}
|
||||
],
|
||||
"results": [
|
||||
{
|
||||
"season": 43,
|
||||
"division": "Main NA",
|
||||
"wins": 0,
|
||||
"losses": 0
|
||||
},
|
||||
{
|
||||
"season": 27,
|
||||
"division": "Main NA",
|
||||
"wins": 2,
|
||||
"losses": 4
|
||||
}
|
||||
],
|
||||
"name_changes": []
|
||||
}
|
||||
|
|
@ -359,7 +359,7 @@ pub struct InvalidRegion {
|
|||
|
||||
#[derive(Serialize, Deserialize, Copy, Clone, Debug)]
|
||||
pub enum Region {
|
||||
Euro,
|
||||
Europe,
|
||||
NorthAmerica,
|
||||
SouthAmerica,
|
||||
Asia,
|
||||
|
|
@ -371,11 +371,13 @@ impl FromStr for Region {
|
|||
|
||||
fn from_str(s: &str) -> Result<Self, Self::Err> {
|
||||
match s {
|
||||
"Euro" => Ok(Region::Euro),
|
||||
"EU" => Ok(Region::Euro),
|
||||
"Euro" => Ok(Region::Europe),
|
||||
"Europe" => Ok(Region::Europe),
|
||||
"EU" => Ok(Region::Europe),
|
||||
"Asia" => Ok(Region::Asia),
|
||||
"ASIA" => Ok(Region::Asia),
|
||||
"NA" => Ok(Region::NorthAmerica),
|
||||
"North America" => Ok(Region::NorthAmerica),
|
||||
"South American" => Ok(Region::SouthAmerica),
|
||||
"SA" => Ok(Region::SouthAmerica),
|
||||
"AUS" => Ok(Region::Australia),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue