diff --git a/src/episode.cpp b/src/episode.cpp
index eb3b086..ca71d56 100644
--- a/src/episode.cpp
+++ b/src/episode.cpp
@@ -166,51 +166,61 @@ namespace dropout_dl {
return size * nmemb;
}
+ std::string episode::get_meta_data_json(const std::string& html_data) {
+ std::string data_start("window.Page = {");
+ char data_open = '{';
+ char data_close = '}';
+ char current_char;
+ // The current grouping depth. 1 because we only use it after we're inside the data brackets
+ int grouping_depth = 1;
+ for (int i = 0; i < html_data.size(); i++) {
+ if (substr_is(html_data, i, data_start)) {
+ i += data_start.size();
+ for (int j = 0; j + i < html_data.size(); j++) {
+ current_char = html_data[j + i];
+ if (current_char == data_open) {
+ grouping_depth++;
+ }
+ else if(current_char == data_close) {
+ grouping_depth--;
+ }
+
+ if (grouping_depth == 0) {
+ return(html_data.substr(i, j));
+ }
+ }
+ }
+ }
+ return "ERROR";
+ }
+
// episode statics
- std::string episode::get_series_name(const std::string& html_data) {
- std::string series_title("series-title");
- std::string open_a_tag("");
- std::string close_a("");
-
- for (int i = 0; i < html_data.size(); i++) {
- if (substr_is(html_data, i, series_title)) {
- for (int j = i + series_title.size(); j < html_data.size(); j++) {
- if (html_data[j] == '\n' || html_data[j] == ' ' || html_data[j] == '\t') continue;
- if (substr_is(html_data, j, open_a_tag)) {
- for (int k = j + open_a_tag.size(); k < html_data.size(); k++) {
- if (substr_is(html_data, k, close_tag)) {
- k++;
- for (int l = 0; l < html_data.size() - k; l++) {
- if (substr_is(html_data, k + l, close_a)) {
- return format_name_string(html_data.substr(k, l));
- }
- }
- }
- }
- }
- }
- }
- }
- return "ERROR";
- }
-
- std::string episode::get_episode_name(const std::string& html_data) {
+ std::string episode::get_series_name(const std::string& meta_data) {
int title_start = -1;
- std::string video_title("video-title");
- std::string open_strong("");
- std::string close_strong("");
- for (int i = 0; i < html_data.size(); i++) {
- if (substr_is(html_data, i, video_title)) {
- for (int j = i; j < html_data.size(); j++) {
- if (substr_is(html_data, j, open_strong)) {
- title_start = j + open_strong.size();
- break;
- }
- }
- for (int j = 0; j < html_data.size() - title_start; j++) {
- if (substr_is(html_data, title_start + j, close_strong)) {
- return format_name_string(html_data.substr(title_start, j));
+ std::string parent_title("\"parent\"");
+ std::string series_title_title("\"name\"");
+ for (int i = 0; i < meta_data.size(); i++) {
+ if (substr_is(meta_data, i, parent_title)) {
+ // Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
+ i += parent_title.size() + 2;
+
+
+ int j;
+ for (j = 0; meta_data[i + j] != '}' && i + j < meta_data.size(); j++);
+
+ std::string series_data = meta_data.substr(i, j);
+
+ std::cout << "series_data: " << series_data << '\n';
+
+ for (j = 0; j < series_data.size(); j++) {
+ if (substr_is(series_data, j, series_title_title)) {
+ // Skip "name", the following colon, and the opening quotation mark.
+ j += series_title_title.size() + 2;
+
+ int k;
+ for (k = 0; j + k < series_data.size() && series_data[j + k] != '"'; k++);
+
+ return series_data.substr(j, k);
}
}
}
@@ -218,22 +228,38 @@ namespace dropout_dl {
return "ERROR";
}
- std::string episode::get_episode_number(const std::string& html_data) {
- std::string episode("Episode");
- std::string close_a("");
- std::string episode_num;
- for (int i = 0; i < html_data.size(); i++) {
- if (substr_is(html_data, i, episode)) {
- for (int j = i + 8; j < html_data.size(); j++) {
- if (html_data[j] == '\n' || html_data[j] == ' ' || html_data[j] == '\t') continue;
- if (substr_is(html_data, j, close_a)) {
- return episode_num;
- }
- episode_num += html_data[j];
- }
+ std::string episode::get_season_name(const std::string& meta_data) {
+ std::string season_title_title("\"COLLECTION_TITLE\"");
+ for (int i = 0; i < meta_data.size(); i++) {
+ if (substr_is(meta_data, i, season_title_title)) {
+ // Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
+ i += season_title_title.size() + 2;
+
+
+ int j;
+ for (j = 0; meta_data[i + j] != '"' && i + j < meta_data.size(); j++);
+
+ return meta_data.substr(i, j);
}
}
- return "-1";
+ return "ERROR";
+ }
+
+ std::string episode::get_episode_name(const std::string& meta_data) {
+ std::string video_title_title("\"VIDEO_TITLE\"");
+ for (int i = 0; i < meta_data.size(); i++) {
+ if (substr_is(meta_data, i, video_title_title)) {
+ // Skip "VIDEO_TITLE", the following colon, and the opening quotation mark.
+ i += video_title_title.size() + 2;
+
+
+ int j;
+ for (j = 0; meta_data[i + j] != '"' && i + j < meta_data.size(); j++);
+
+ return meta_data.substr(i, j);
+ }
+ }
+ return "ERROR";
}
std::string episode::get_embed_url(const std::string& html_data) {
@@ -459,8 +485,6 @@ namespace dropout_dl {
if(curl) {
std::string out;
-
-
curl_easy_setopt(curl, CURLOPT_URL, get_video_url(quality).c_str());
curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, dropout_dl::WriteCallback);
curl_easy_setopt(curl, CURLOPT_WRITEDATA, &out);
@@ -478,10 +502,7 @@ namespace dropout_dl {
void episode::download(const std::string& quality, const std::string& series_directory, std::string filename) {
if (filename.empty()) {
- filename = "E" + (this->episode_number.size() < 2 ? "0" + this->episode_number : this->episode_number) + this->name +
- ".mp4";
-
- filename = format_filename(filename);
+ filename = this->series + " - " + this->season + " - " + this->name + ".mp4";
}
if (quality == "all") {
diff --git a/src/episode.h b/src/episode.h
index 0c6b8fe..b18d9b2 100644
--- a/src/episode.h
+++ b/src/episode.h
@@ -256,10 +256,12 @@ namespace dropout_dl {
std::string series;
/// The directory for the series
std::string series_directory;
+ /// The name of the season that the episode belongs to
+ std::string season;
+ /// The json metadata of the episode
+ std::string metadata;
/// The name of the episode
std::string name;
- /// The number of the episode in the season. This can be a number or a string
- std::string episode_number;
/// The url for the main episode page
std::string episode_url;
/// The data of the main episode page
@@ -292,33 +294,43 @@ namespace dropout_dl {
*/
static std::string get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose = false);
+ /**
+ *
+ * @param html_data - Episode page data
+ * @return The json data for the episode
+ *
+ * Gets the json metadata of the episode
+ */
+ static std::string get_meta_data_json(const std::string& html_data);
+
// Parsing
/**
*
- * @param html_data - Episode page data
+ * @param meta_data - Episode metadata in json format
* @return The name of the series
*
- * Get the name of the series from the episode page
+ * Get the name of the series from the metadata
*/
- static std::string get_series_name(const std::string& html_data);
+ static std::string get_series_name(const std::string& meta_data);
+
/**
*
- * @param html_data - Episode page data
+ * @param meta_data - Episode metadata in json format
+ * @return The name of the season
+ *
+ * Get the name of the season from the metadata
+ */
+ static std::string get_season_name(const std::string& meta_data);
+
+ /**
+ *
+ * @param meta_data - Episode metadata in json format
* @return The name of the episode
*
- * Get the name of the episode from the episode page
+ * Get the name of the episode from the metadata
*/
- static std::string get_episode_name(const std::string& html_data);
-
- /**
- *
- * @param html_data - Episode page data
- * @return The number of the episode
- *
- * Get the number of the episode from the episode page
- */
- static std::string get_episode_number(const std::string& html_data);
+ static std::string get_episode_name(const std::string& meta_data);
/**
*
@@ -393,7 +405,17 @@ namespace dropout_dl {
episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value);
- name = get_episode_name(episode_data);
+ if (verbose) {
+ std::cout << "Got page data\n";
+ }
+
+ metadata = get_meta_data_json(episode_data);
+
+ if (verbose) {
+ std::cout << "Got episode metadata: " << metadata << '\n';
+ }
+
+ name = get_episode_name(metadata);
if (verbose) {
std::cout << "Got name: " << name << '\n';
@@ -404,18 +426,18 @@ namespace dropout_dl {
exit(6);
}
- this->episode_number = get_episode_number(episode_data);
-
- if (verbose) {
- std::cout << "Got episode: " << this->episode_number << '\n';
- }
-
- this->series = get_series_name(episode_data);
+ this->series = get_series_name(metadata);
if (verbose) {
std::cout << "Got series: " << this->series << '\n';
}
+ this->season = get_season_name(metadata);
+
+ if (verbose) {
+ std::cout << "Got season: " << this->season << '\n';
+ }
+
this->series_directory = format_filename(this->series);
if (verbose) {
diff --git a/src/main.cpp b/src/main.cpp
index 3f21464..1ddd88b 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -289,7 +289,7 @@ std::vector get_cookies_from_chrome(const std::filesystem::p
*/
std::vector get_cookies(bool verbose = false) {
- std::filesystem::path firefox_profile("_firefox_profile");
+ std::filesystem::path firefox_profile("firefox_profile");
std::filesystem::path chrome_profile("chrome_profile");
if (std::filesystem::exists(firefox_profile)) {
@@ -369,10 +369,6 @@ int main(int argc, char** argv) {
}
}
- if (options.filename.empty()) {
- options.filename = dropout_dl::format_filename(ep.name + ".mp4");
- }
-
ep.download(options.quality, options.output_directory, options.filename);
}
diff --git a/src/season.cpp b/src/season.cpp
index 7aa13aa..6a5f074 100644
--- a/src/season.cpp
+++ b/src/season.cpp
@@ -45,7 +45,7 @@ namespace dropout_dl {
if (e.episode_url.empty()) {
continue;
}
- std::cout << '\t' << e.episode_number << ": " << e.name << '\n';
+ std::cout << '\t' << e.name << '\n';
out.push_back(e);
}
}