This commit is contained in:
Robin Appelman 2023-11-15 21:05:11 +01:00
commit 0c58410f6a
11 changed files with 1902 additions and 0 deletions

1
.envrc Normal file
View file

@ -0,0 +1 @@
use flake

3
.gitignore vendored Normal file
View file

@ -0,0 +1,3 @@
/target
result*
.direnv

1603
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

13
Cargo.toml Normal file
View file

@ -0,0 +1,13 @@
[package]
name = "ugc-scraper"
version = "0.1.0"
edition = "2021"
rust-version = "1.67"
[dependencies]
tokio = { version = "1.34.0", features = ["macros", "rt-multi-thread", "rt"] }
reqwest = "0.11.22"
scraper = "0.18.1"
miette = "5.10.0"
thiserror = "1.0.50"
main_error = "0.1.2"

106
flake.lock generated Normal file
View file

@ -0,0 +1,106 @@
{
"nodes": {
"naersk": {
"inputs": {
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1686242667,
"narHash": "sha256-I7Kwp06WX/9E+rEND1i1wjdKQQm3XiDxYOyNK9fuJu0=",
"owner": "icewind1991",
"repo": "naersk",
"rev": "6d245a3bbb2ee31ec726bb57b9a8b206302e7110",
"type": "github"
},
"original": {
"owner": "icewind1991",
"repo": "naersk",
"rev": "6d245a3bbb2ee31ec726bb57b9a8b206302e7110",
"type": "github"
}
},
"nixpkgs": {
"locked": {
"lastModified": 1699994397,
"narHash": "sha256-xxNeIcMNMXH2EA9IAX6Cny+50mvY22LhIBiGZV363gc=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "d4b5a67bbe9ef750bd2fdffd4cad400dd5553af8",
"type": "github"
},
"original": {
"id": "nixpkgs",
"ref": "nixos-23.05",
"type": "indirect"
}
},
"root": {
"inputs": {
"naersk": "naersk",
"nixpkgs": "nixpkgs",
"rust-overlay": "rust-overlay",
"utils": "utils"
}
},
"rust-overlay": {
"inputs": {
"flake-utils": [
"utils"
],
"nixpkgs": [
"nixpkgs"
]
},
"locked": {
"lastModified": 1700014539,
"narHash": "sha256-YF+AoCicGzsrlNDrXZYCymfVyYwrMBox+6vbLBsrEQM=",
"owner": "oxalica",
"repo": "rust-overlay",
"rev": "ed9fd98b28da90b1b28340f3230d35b2061b9752",
"type": "github"
},
"original": {
"owner": "oxalica",
"repo": "rust-overlay",
"type": "github"
}
},
"systems": {
"locked": {
"lastModified": 1681028828,
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
"owner": "nix-systems",
"repo": "default",
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
"type": "github"
},
"original": {
"owner": "nix-systems",
"repo": "default",
"type": "github"
}
},
"utils": {
"inputs": {
"systems": "systems"
},
"locked": {
"lastModified": 1694529238,
"narHash": "sha256-zsNZZGTGnMOf9YpHKJqMSsa0dXbfmxeoJ7xHlrt+xmY=",
"owner": "numtide",
"repo": "flake-utils",
"rev": "ff7b65b44d01cf9ba6a71320833626af21126384",
"type": "github"
},
"original": {
"owner": "numtide",
"repo": "flake-utils",
"type": "github"
}
}
},
"root": "root",
"version": 7
}

89
flake.nix Normal file
View file

@ -0,0 +1,89 @@
{
inputs = {
nixpkgs.url = "nixpkgs/nixos-23.05";
utils.url = "github:numtide/flake-utils";
naersk.url = "github:icewind1991/naersk?rev=6d245a3bbb2ee31ec726bb57b9a8b206302e7110";
naersk.inputs.nixpkgs.follows = "nixpkgs";
rust-overlay.url = "github:oxalica/rust-overlay";
rust-overlay.inputs.nixpkgs.follows = "nixpkgs";
rust-overlay.inputs.flake-utils.follows = "utils";
};
outputs = {
self,
nixpkgs,
utils,
naersk,
rust-overlay,
}:
utils.lib.eachDefaultSystem (system: let
overlays = [(import rust-overlay)];
pkgs = (import nixpkgs) {
inherit system overlays;
};
inherit (pkgs) lib callPackage rust-bin mkShell;
inherit (lib.sources) sourceByRegex;
msrv = (fromTOML (readFile ./Cargo.toml)).package.rust-version;
inherit (builtins) fromTOML readFile;
toolchain = rust-bin.stable.latest.default;
msrvToolchain = rust-bin.stable."${msrv}".default;
naersk' = callPackage naersk {
rustc = toolchain;
cargo = toolchain;
};
msrvNaersk = callPackage naersk {
rustc = msrvToolchain;
cargo = msrvToolchain;
};
src = sourceByRegex ./. ["Cargo.*" "(src|derive|benches|tests|examples|koth_bagel.*)(/.*)?"];
nearskOpt = {
pname = "vbsp";
root = src;
};
in rec {
packages = {
check = naersk'.buildPackage (nearskOpt
// {
mode = "check";
});
clippy = naersk'.buildPackage (nearskOpt
// {
mode = "clippy";
});
test = naersk'.buildPackage (nearskOpt
// {
release = false;
mode = "test";
});
msrv = msrvNaersk.buildPackage (nearskOpt
// {
mode = "check";
});
};
devShells = let
tools = with pkgs; [
bacon
cargo-edit
cargo-outdated
cargo-audit
cargo-msrv
cargo-semver-checks
];
deps = with pkgs; [
pkg-config
openssl
];
in {
default = mkShell {
nativeBuildInputs = [toolchain] ++ tools ++ deps;
};
msrv = mkShell {
nativeBuildInputs = [msrvToolchain] ++ tools ++ deps;
};
};
});
}

4
src/data.rs Normal file
View file

@ -0,0 +1,4 @@
#[derive(Debug)]
pub struct Player {
pub name: String,
}

20
src/error.rs Normal file
View file

@ -0,0 +1,20 @@
use thiserror::Error;
use miette::Diagnostic;
#[derive(Debug, Error, Diagnostic)]
pub enum ScrapeError {
#[error("Failed to request data: {0:#}")]
Request(#[from] reqwest::Error),
#[error(transparent)]
#[diagnostic(transparent)]
Parse(#[from] ParseError)
}
#[derive(Debug, Error, Diagnostic)]
pub enum ParseError {
#[error("Couldn't find expected element '{selector}' for {role}")]
ElementNotFound {
selector: &'static str,
role: &'static str
}
}

18
src/main.rs Normal file
View file

@ -0,0 +1,18 @@
mod error;
mod parser;
pub mod data;
use main_error::MainResult;
use reqwest::get;
pub use error::*;
use crate::parser::{PlayerParser, Parser};
pub type Result<T, E = ScrapeError> = std::result::Result<T, E>;
#[tokio::main]
async fn main() -> MainResult {
let body = get("https://www.ugcleague.com/players_page.cfm?player_id=76561198024494988").await?.text().await?;
let parser = PlayerParser::new();
dbg!(parser.parse(&body)?);
Ok(())
}

10
src/parser/mod.rs Normal file
View file

@ -0,0 +1,10 @@
use crate::Result;
mod player;
pub use player::*;
pub trait Parser {
type Output;
fn parse(&self, document: &str) -> Result<Self::Output>;
}

35
src/parser/player.rs Normal file
View file

@ -0,0 +1,35 @@
use scraper::{Html, Selector};
use super::Parser;
use crate::{ParseError, Result};
use crate::data::Player;
const SELECTOR_PLAYER_NAME: &str = ".col-md-4 > h3 > b";
pub struct PlayerParser {
selector_name: Selector,
}
impl PlayerParser {
pub fn new() -> Self {
PlayerParser {
selector_name: Selector::parse(SELECTOR_PLAYER_NAME).unwrap(),
}
}
}
impl Parser for PlayerParser {
type Output = Player;
fn parse(&self, document: &str) -> Result<Self::Output> {
let document = Html::parse_document(&document);
let name = document.select(&self.selector_name).next().ok_or(ParseError::ElementNotFound {
selector: SELECTOR_PLAYER_NAME,
role: "player name",
})?.text().next().unwrap_or_default().to_string();
Ok(Player {
name
})
}
}