add match page

This commit is contained in:
Robin Appelman 2023-11-19 00:16:50 +01:00
commit 668edc434a
9 changed files with 3614 additions and 7 deletions

2
api-server/Cargo.lock generated
View file

@ -1743,8 +1743,6 @@ dependencies = [
[[package]] [[package]]
name = "ugc-scraper" name = "ugc-scraper"
version = "0.1.0" version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a65a605e079609b2f105c2f8930a0fb6786766fae5ef8776692cbb8a85b2b56"
dependencies = [ dependencies = [
"reqwest", "reqwest",
"scraper", "scraper",

View file

@ -64,6 +64,7 @@ async fn main() -> MainResult {
.route("/team/:id", get(team)) .route("/team/:id", get(team))
.route("/team/:id/roster", get(team_roster)) .route("/team/:id/roster", get(team_roster))
.route("/team/:id/matches", get(team_matches)) .route("/team/:id/matches", get(team_matches))
.route("/match/:id", get(match_page))
.with_state(AppState::default()); .with_state(AppState::default());
// run it // run it
@ -150,3 +151,13 @@ async fn team_matches(
let response = state.client.team_matches(id).await?; let response = state.client.team_matches(id).await?;
Ok(Json(response)) Ok(Json(response))
} }
#[instrument(skip(state))]
async fn match_page(
Path(id): Path<u32>,
State(state): State<AppState>,
) -> Result<impl IntoResponse, ApiError> {
debug!(team = id, "requesting match");
let response = state.client.match_info(id).await?;
Ok(Json(response))
}

View file

@ -189,3 +189,14 @@ pub struct Season {
pub id: String, pub id: String,
pub name: String, pub name: String,
} }
#[derive(Debug, Clone)]
#[cfg_attr(feature = "serde", derive(serde::Serialize))]
pub struct MatchInfo {
pub comment: Option<String>,
pub comment_author: Option<String>,
pub team_home: TeamRef,
pub team_away: TeamRef,
pub score_home: u8,
pub score_away: u8,
}

View file

@ -3,10 +3,12 @@ mod error;
#[doc(hidden)] #[doc(hidden)]
pub mod parser; pub mod parser;
use crate::data::{MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason}; use crate::data::{
MatchInfo, MembershipHistory, Player, RosterHistory, Seasons, Team, TeamRef, TeamSeason,
};
use crate::parser::{ use crate::parser::{
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser, MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
TeamParser, TeamRosterHistoryParser, TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
}; };
pub use error::*; pub use error::*;
use reqwest::redirect::Policy; use reqwest::redirect::Policy;
@ -25,6 +27,7 @@ pub struct UgcClient {
team_matches_parser: TeamMatchesParser, team_matches_parser: TeamMatchesParser,
seasons_parser: SeasonsParser, seasons_parser: SeasonsParser,
team_lookup_parser: TeamLookupParser, team_lookup_parser: TeamLookupParser,
match_page_parser: MatchPageParser,
} }
/// "API client" for ugc by scraping the website /// "API client" for ugc by scraping the website
@ -39,6 +42,7 @@ impl UgcClient {
team_matches_parser: TeamMatchesParser::new(), team_matches_parser: TeamMatchesParser::new(),
seasons_parser: SeasonsParser::new(), seasons_parser: SeasonsParser::new(),
team_lookup_parser: TeamLookupParser::new(), team_lookup_parser: TeamLookupParser::new(),
match_page_parser: MatchPageParser::new(),
} }
} }
@ -161,4 +165,19 @@ impl UgcClient {
self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm") self.teams("https://www.ugcleague.com/team_lookup_tf22.cfm")
.await .await
} }
/// Get match page info
pub async fn match_info(&self, id: u32) -> Result<MatchInfo> {
let body = self
.client
.get(&format!(
"https://www.ugcleague.com/matchpage_tf2h.cfm?mid={}",
id
))
.send()
.await?
.text()
.await?;
self.match_page_parser.parse(&body)
}
} }

138
src/parser/match_page.rs Normal file
View file

@ -0,0 +1,138 @@
use super::Parser;
use crate::data::{MatchInfo, TeamRef};
use crate::parser::{select_last_text, select_text, team_id_from_link, ElementExt};
use crate::{ParseError, Result};
use scraper::{Html, Selector};
const SELECTOR_MATCH_COMMENT_AUTHOR: &str = ".row-fluid .col-md-12 span.text-success";
const SELECTOR_MATCH_COMMENT: &str = ".row-fluid .col-md-12 > .white-row-light-small > p";
const SELECTOR_MATCH_TEAM_LINK: &str = "a[href^=\"team_page\"]:not(.btn-large)";
const SELECTOR_MATCH_RESULT_TEAM: &str =
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(1)";
const SELECTOR_MATCH_RESULT_SCORE: &str =
".table.table-condensed.table-bordered tr:nth-child(2) td:nth-child(2)";
pub struct MatchPageParser {
selector_author: Selector,
selector_comment: Selector,
selector_team_link: Selector,
selector_result_team: Selector,
selector_result_score: Selector,
}
impl Default for MatchPageParser {
fn default() -> Self {
Self::new()
}
}
impl MatchPageParser {
pub fn new() -> Self {
MatchPageParser {
selector_author: Selector::parse(SELECTOR_MATCH_COMMENT_AUTHOR).unwrap(),
selector_comment: Selector::parse(SELECTOR_MATCH_COMMENT).unwrap(),
selector_team_link: Selector::parse(SELECTOR_MATCH_TEAM_LINK).unwrap(),
selector_result_team: Selector::parse(SELECTOR_MATCH_RESULT_TEAM).unwrap(),
selector_result_score: Selector::parse(SELECTOR_MATCH_RESULT_SCORE).unwrap(),
}
}
}
impl Parser for MatchPageParser {
type Output = MatchInfo;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(document);
let author = select_text(document.root_element(), &self.selector_author);
let comment = select_last_text(document.root_element(), &self.selector_comment);
let mut team_links = document.select(&self.selector_team_link);
let team_link_home = team_links.next().ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_TEAM_LINK,
role: "home team link",
})?;
let team_link_away = team_links.next().ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_TEAM_LINK,
role: "away team link",
})?;
let home_team_id = team_id_from_link(team_link_home.attr("href").unwrap_or_default())?;
let away_team_id = team_id_from_link(team_link_away.attr("href").unwrap_or_default())?;
let mut team_names = document.select(&self.selector_result_team);
let team_name_home = team_names
.next()
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_RESULT_TEAM,
role: "home team link",
})?
.first_text()
.ok_or(ParseError::EmptyText {
role: "home team name",
selector: SELECTOR_MATCH_RESULT_TEAM,
})?
.to_string();
let team_name_away = team_names
.next()
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_RESULT_TEAM,
role: "away team link",
})?
.first_text()
.ok_or(ParseError::EmptyText {
role: "away team name",
selector: SELECTOR_MATCH_RESULT_TEAM,
})?
.to_string();
let mut team_scores = document.select(&self.selector_result_score);
let team_score_home = team_scores
.next()
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_RESULT_SCORE,
role: "home team score",
})?
.first_text()
.ok_or(ParseError::EmptyText {
role: "home team score",
selector: SELECTOR_MATCH_RESULT_SCORE,
})?
.parse()
.map_err(|_| ParseError::InvalidText {
role: "away team score",
text: "dont have this".to_string(),
})?;
let team_score_away = team_scores
.next()
.ok_or(ParseError::ElementNotFound {
selector: SELECTOR_MATCH_RESULT_SCORE,
role: "away team link",
})?
.first_text()
.ok_or(ParseError::EmptyText {
role: "away team name",
selector: SELECTOR_MATCH_RESULT_SCORE,
})?
.parse()
.map_err(|_| ParseError::InvalidText {
role: "home team score",
text: "dont have this".to_string(),
})?;
Ok(MatchInfo {
comment_author: author.map(String::from),
comment: comment.map(String::from),
score_away: team_score_away,
score_home: team_score_home,
team_home: TeamRef {
name: team_name_home.to_string(),
id: home_team_id,
},
team_away: TeamRef {
name: team_name_away.to_string(),
id: away_team_id,
},
})
}
}

View file

@ -4,6 +4,7 @@ use steamid_ng::SteamID;
use time::format_description::FormatItem; use time::format_description::FormatItem;
use time::macros::format_description; use time::macros::format_description;
mod match_page;
mod player; mod player;
mod player_details; mod player_details;
mod seasons; mod seasons;
@ -12,6 +13,7 @@ mod team_lookup;
mod team_matches; mod team_matches;
mod team_roster_history; mod team_roster_history;
pub use match_page::*;
pub use player::*; pub use player::*;
pub use player_details::*; pub use player_details::*;
pub use seasons::*; pub use seasons::*;

3402
tests/data/match_116246.html Normal file

File diff suppressed because it is too large Load diff

View file

@ -1,8 +1,8 @@
use insta::assert_json_snapshot; use insta::assert_json_snapshot;
use std::fs::read_to_string; use std::fs::read_to_string;
use ugc_scraper::parser::{ use ugc_scraper::parser::{
Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser, TeamMatchesParser, MatchPageParser, Parser, PlayerDetailsParser, PlayerParser, SeasonsParser, TeamLookupParser,
TeamParser, TeamRosterHistoryParser, TeamMatchesParser, TeamParser, TeamRosterHistoryParser,
}; };
#[test] #[test]
@ -92,3 +92,11 @@ fn test_parse_seasons_2_html() {
let parsed = parser.parse(&body).unwrap(); let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed); assert_json_snapshot!(parsed);
} }
#[test]
fn test_parse_match_html() {
let body = read_to_string("tests/data/match_116246.html").unwrap();
let parser = MatchPageParser::new();
let parsed = parser.parse(&body).unwrap();
assert_json_snapshot!(parsed);
}

View file

@ -0,0 +1,18 @@
---
source: tests/snapshot.rs
expression: parsed
---
{
"comment": "https://logs.tf/3509421#76561198288857894\r\nhttps://logs.tf/3509435#76561198288857894",
"comment_author": "Vkid E-sports:",
"team_home": {
"name": "Vkid E-Sports",
"id": 32033
},
"team_away": {
"name": "Xenon",
"id": 7861
},
"score_home": 4,
"score_away": 0
}