From 7c185809eb30c2df72561ad2b4339c7258882380 Mon Sep 17 00:00:00 2001 From: Moss Date: Wed, 8 Feb 2023 10:51:53 -0800 Subject: [PATCH] Cookies: Remove __cf_bm Cookies As it is Entirely Optional Turns out you can get everything you need with just the session token and cf_bm must be some kind of tracking token... After looking up "cf_bm" it is clear that it is a bot managment token from cloudflare... CloudFlare Bot Management... cf_bm :0. Damn it I spent so much time not understanding what this is and I could have just looked it up. Anyways it's no longer required for this program. --- src/cookie.h | 10 ++++ src/episode.cpp | 4 +- src/episode.h | 11 ++--- src/login.cpp | 3 +- src/login.h | 2 +- src/main.cpp | 128 ++++++++++++++++++++---------------------------- src/season.cpp | 8 +-- src/season.h | 12 ++--- src/series.cpp | 8 +-- src/series.h | 14 +++--- src/util.cpp | 56 --------------------- src/util.h | 12 ----- 12 files changed, 93 insertions(+), 175 deletions(-) diff --git a/src/cookie.h b/src/cookie.h index 2881be5..a279a7d 100644 --- a/src/cookie.h +++ b/src/cookie.h @@ -43,6 +43,16 @@ namespace dropout_dl { int len; + /** + * + * Create a cookie with no name, value, or length. + */ + cookie() { + this->name = ""; + this->value = ""; + this->len = 0; + } + /** * * @param name - Name of the value from the sqlite database diff --git a/src/episode.cpp b/src/episode.cpp index 1b8bb22..c7129d7 100644 --- a/src/episode.cpp +++ b/src/episode.cpp @@ -154,7 +154,7 @@ namespace dropout_dl { return ""; } - std::string episode::get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose) { + std::string episode::get_episode_page(const std::string& url, const std::string& session_cookie, bool verbose) { CURLcode ret; CURL *hnd; struct curl_slist *slist1; @@ -167,7 +167,7 @@ namespace dropout_dl { slist1 = curl_slist_append(slist1, "Accept-Encoding: utf-8"); slist1 = curl_slist_append(slist1, "DNT: 1"); slist1 = curl_slist_append(slist1, "Connection: keep-alive"); - slist1 = curl_slist_append(slist1, ("Cookie: locale_det=en; _session=" + session_cookie + "; __cf_bm=" + auth_cookie).c_str()); + slist1 = curl_slist_append(slist1, ("Cookie: locale_det=en; _session=" + session_cookie + ";").c_str()); slist1 = curl_slist_append(slist1, "Upgrade-Insecure-Requests: 1"); slist1 = curl_slist_append(slist1, "Sec-Fetch-Dest: document"); slist1 = curl_slist_append(slist1, "Sec-Fetch-Mode: navigate"); diff --git a/src/episode.h b/src/episode.h index 16274fe..2ec3125 100644 --- a/src/episode.h +++ b/src/episode.h @@ -64,12 +64,11 @@ namespace dropout_dl { /** * * @param url - The url of the episode page - * @param auth_cookie - The authentication cookie with name "__cf_bm". * @param session_cookie - The session cookie with name "_session". * @param verbose - Whether or not to be verbose (not recommended) * @return The episode page data */ - static std::string get_episode_page(const std::string& url, const std::string& auth_cookie, const std::string& session_cookie, bool verbose = false); + static std::string get_episode_page(const std::string& url, const std::string& session_cookie, bool verbose = false); /** * @@ -202,11 +201,11 @@ namespace dropout_dl { * Create an episode object from the link using to cookies to get all the necessary information. * This constructor initializes all the object data. */ - episode(const std::string& episode_url, std::vector cookies, const std::string& series, const std::string& season, int episode_number, int season_number, bool verbose = false, bool download_captions = false) { + episode(const std::string& episode_url, cookie session_cookie, const std::string& series, const std::string& season, int episode_number, int season_number, bool verbose = false, bool download_captions = false) { this->episode_url = episode_url; this->verbose = verbose; - episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value); + episode_data = get_episode_page(episode_url, session_cookie.value); if (verbose) { std::cout << "Got page data\n"; @@ -287,11 +286,11 @@ namespace dropout_dl { * Create an episode object from the link using to cookies to get all the necessary information. * This constructor initializes all the object data. */ - episode(const std::string& episode_url, std::vector cookies, bool verbose = false, bool download_captions = false) { + episode(const std::string& episode_url, const cookie& session_cookie, bool verbose = false, bool download_captions = false) { this->episode_url = episode_url; this->verbose = verbose; - episode_data = get_episode_page(episode_url, cookies[0].value, cookies[1].value); + episode_data = get_episode_page(episode_url, session_cookie.value); if (verbose) { std::cout << "Got page data\n"; diff --git a/src/login.cpp b/src/login.cpp index e5a3215..ab0aa2a 100644 --- a/src/login.cpp +++ b/src/login.cpp @@ -1,7 +1,7 @@ #include "login.h" -void dropout_dl::login::get_cookies(std::string& session, std::string& cf_bm) { +void dropout_dl::login::get_cookies(std::string& session) { std::string email; std::string password; @@ -11,6 +11,7 @@ void dropout_dl::login::get_cookies(std::string& session, std::string& cf_bm) { /// Needed to login properly std::string authentication; + std::string cf_bm; get_login_tokens(session, cf_bm, authentication); if (!login_with_tokens(email, password, session, cf_bm, authentication)) { diff --git a/src/login.h b/src/login.h index c0872b8..3d50db7 100644 --- a/src/login.h +++ b/src/login.h @@ -13,7 +13,7 @@ namespace dropout_dl { namespace login { - void get_cookies(std::string& session, std::string& cf_bm); + void get_cookies(std::string& session); void get_login_info_from_file(const std::string& filename, std::string& email, std::string& password); diff --git a/src/main.cpp b/src/main.cpp index 4df2573..49baa51 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -28,7 +28,7 @@ namespace dropout_dl { std::string filename; std::string output_directory; std::string episode; - std::vector cookies; + cookie session_cookie; /** * @@ -81,8 +81,7 @@ namespace dropout_dl { std::cerr << "ARGUMENT PARSE ERROR: --force-cookies used with too few following arguments\n"; exit(8); } - cookies.emplace_back(args[++i]); - cookies.emplace_back(args[++i]); + session_cookie = cookie(args[++i]); force_cookies = true; } else if (arg == "output") { @@ -122,7 +121,7 @@ namespace dropout_dl { "\t--output-directory Set the directory where files are output\n" "\t--verbose Display debug information while running\n" "\t--browser-cookies Use cookies from the browser placed in 'firefox_profile' or 'chrome_profile'\n" - "\t--force-cookies Interpret the next to arguments as authentication cookie and session cookie\n" + "\t--force-cookies Interpret the next to argument as the session cookie\n" "\t--series Interpret the url as a link to a series and download all episodes from all seasons\n" "\t--season Interpret the url as a link to a season and download all episodes from all seasons\n" "\t--captions Download the captions along with the episode\n" @@ -178,71 +177,62 @@ namespace dropout_dl { * * Gets the needed cookies from the firefox sqlite database associated with the path provided. */ -std::vector get_cookies_from_firefox(const std::filesystem::path& firefox_profile_path, bool verbose = false) { +dropout_dl::cookie get_cookies_from_firefox(const std::filesystem::path& firefox_profile_path, bool verbose = false) { std::fstream firefox_profile_file(firefox_profile_path); std::string firefox_profile; - dropout_dl::cookie auth("__cf_bm"); dropout_dl::cookie session("_session"); std::vector out; firefox_profile_file >> firefox_profile; - if (std::filesystem::is_directory(firefox_profile)) { - - sqlite3 *db; - - if (verbose) { - std::cout << "Getting firefox cookies from firefox sqlite db\n"; - } - - if (!std::filesystem::is_directory("tmp")) - std::filesystem::create_directories("tmp"); - std::filesystem::remove("tmp/firefox_cookies.sqlite"); - std::filesystem::copy_file(firefox_profile + "/cookies.sqlite", "tmp/firefox_cookies.sqlite"); - - int rc = sqlite3_open("tmp/firefox_cookies.sqlite", &db); - if (rc) { - std::cerr << "Can't open database: " << sqlite3_errmsg(db) << '\n'; - exit(1); - } else { - if (verbose) { - std::cout << "Firefox database opened successfully\n"; - } - } - - std::string len; - - auth.get_value_from_db(db, "FROM moz_cookies WHERE host LIKE '%dropout.tv%'", "value"); - - session.get_value_from_db(db, "FROM moz_cookies WHERE host LIKE '%dropout.tv%'", "value"); - - sqlite3_close(db); - - std::filesystem::remove("tmp/firefox_cookies.sqlite"); - - if (std::filesystem::is_empty("tmp")) { - std::filesystem::remove("tmp/"); - } - } - else { + if (!std::filesystem::is_directory(firefox_profile)) { std::cerr << "FIREFOX COOKIE ERROR: Attempted to get cookies from firefox without profile." << std::endl; exit(4); } - if (verbose) { - std::cout << auth.name << ": " << auth.len << ": " << auth.value << '\n'; + sqlite3 *db; + + if (verbose) { + std::cout << "Getting firefox cookies from firefox sqlite db\n"; + } + + /// Firefox locks the database so we have to make a copy. + if (!std::filesystem::is_directory("tmp")) + std::filesystem::create_directories("tmp"); + std::filesystem::remove("tmp/firefox_cookies.sqlite"); + std::filesystem::copy_file(firefox_profile + "/cookies.sqlite", "tmp/firefox_cookies.sqlite"); + + int rc = sqlite3_open("tmp/firefox_cookies.sqlite", &db); + if (rc) { + std::cerr << "Can't open database: " << sqlite3_errmsg(db) << '\n'; + exit(1); + } else { + if (verbose) { + std::cout << "Firefox database opened successfully\n"; + } + } + + std::string len; + + session.get_value_from_db(db, "FROM moz_cookies WHERE host LIKE '%dropout.tv%'", "value"); + + sqlite3_close(db); + + std::filesystem::remove("tmp/firefox_cookies.sqlite"); + + if (std::filesystem::is_empty("tmp")) { + std::filesystem::remove("tmp/"); + } + + if (verbose) { std::cout << session.name << ": " << session.len << ": " << session.value << '\n'; } - out.push_back(auth); - out.push_back(session); - - - return out; + return session; } #ifdef DROPOUT_DL_GCRYPT @@ -256,12 +246,11 @@ std::vector get_cookies_from_firefox(const std::filesystem:: * This function does not work for windows and must be modified slightly for mac os. * For mac os the calls to cookie::chrome_decrypt must be passed the parameters detailed in it's documentation. */ -std::vector get_cookies_from_chrome(const std::filesystem::path& chrome_profile_path, bool verbose = false) { +dropout_dl::cookie get_cookies_from_chrome(const std::filesystem::path& chrome_profile_path, bool verbose = false) { std::fstream chrome_profile_file(chrome_profile_path); std::string chrome_profile; - dropout_dl::cookie auth("__cf_bm"); dropout_dl::cookie session("_session"); std::vector out; @@ -288,8 +277,6 @@ std::vector get_cookies_from_chrome(const std::filesystem::p std::string len; - auth.get_value_from_db(db, "FROM cookies WHERE host_key LIKE '%dropout.tv%'", "encrypted_value"); - session.get_value_from_db(db, "FROM cookies WHERE host_key LIKE '%dropout.tv%'", "encrypted_value"); sqlite3_close(db); @@ -300,20 +287,13 @@ std::vector get_cookies_from_chrome(const std::filesystem::p exit(4); } - auth.chrome_decrypt(); - session.chrome_decrypt(); if (verbose) { - std::cout << auth.name << ": " << auth.len << ": " << auth.value << '\n'; - std::cout << session.name << ": " << session.len << ": " << session.value << '\n'; } - out.push_back(auth); - out.push_back(session); - - return out; + return session; } #endif #endif @@ -326,7 +306,7 @@ std::vector get_cookies_from_chrome(const std::filesystem::p * Determines whether to get cookies from firefox or chrome. This function should not be run if cookies are forced using the `--force-cookies` option. * This function checks firefox first so if both firefox and chrome profiles are provided it will use firefox. */ -std::vector get_cookies_from_browser(bool verbose = false) { +dropout_dl::cookie get_cookie_from_browser(bool verbose = false) { std::filesystem::path firefox_profile("firefox_profile"); std::filesystem::path chrome_profile("chrome_profile"); @@ -347,10 +327,8 @@ std::vector get_cookies_from_browser(bool verbose = false) { #endif } - { - std::cerr << "ERROR: dropout.tv cookies could not be found" << std::endl; - exit(7); - } + std::cerr << "ERROR: dropout.tv cookies could not be found" << std::endl; + exit(7); } @@ -377,20 +355,20 @@ int main(int argc, char** argv) { } if (options.browser_cookies) { - options.cookies = get_cookies_from_browser(options.verbose); + options.session_cookie = get_cookie_from_browser(options.verbose); } else if (!options.force_cookies) { - std::string session, cf_bm; - dropout_dl::login::get_cookies(session, cf_bm); + std::string session; + dropout_dl::login::get_cookies(session); - options.cookies = {{"__cf_bm", cf_bm}, {"_session", session}}; + options.session_cookie = dropout_dl::cookie("_session", session); } if (options.is_series) { if (options.verbose) { std::cout << "Getting series\n"; } - dropout_dl::series series(options.url, options.cookies, options.download_captions); + dropout_dl::series series(options.url, options.session_cookie, options.download_captions); series.download(options.quality, options.output_directory); } @@ -398,7 +376,7 @@ int main(int argc, char** argv) { if (options.verbose) { std::cout << "Getting season\n"; } - dropout_dl::season season = dropout_dl::series::get_season(options.url, options.cookies, options.download_captions); + dropout_dl::season season = dropout_dl::series::get_season(options.url, options.session_cookie, options.download_captions); season.download(options.quality, options.output_directory + "/" + season.series_name); } @@ -406,7 +384,7 @@ int main(int argc, char** argv) { if (options.verbose) { std::cout << "Getting episode\n"; } - dropout_dl::episode ep(options.url, options.cookies, options.verbose, options.download_captions); + dropout_dl::episode ep(options.url, options.session_cookie, options.verbose, options.download_captions); if (options.verbose) { std::cout << "filename: " << options.filename << '\n'; diff --git a/src/season.cpp b/src/season.cpp index 89b231d..db6fa6f 100644 --- a/src/season.cpp +++ b/src/season.cpp @@ -5,7 +5,7 @@ #include "season.h" namespace dropout_dl { - episode season::get_episode(const std::string& html_data, int& start_point, const std::vector& cookies) { + episode season::get_episode(const std::string& html_data, int& start_point, const cookie& session_cookie) { int link_start = 0; for (int i = start_point; i > 0; i--) { if (substr_is(html_data, i, "series_name, this->name, episode_number, this->season_number, false, this->download_captions); + return episode(html_data.substr(i, j), session_cookie, this->series_name, this->name, episode_number, this->season_number, false, this->download_captions); } } } @@ -38,14 +38,14 @@ namespace dropout_dl { exit(8); } - std::vector season::get_episodes(const std::vector& cookies) { + std::vector season::get_episodes(const cookie& session_cookie) { std::vector out; std::string site_video(R"(class="browse-item-link" data-track-event="site_video")"); for (int i = 0; i < this->page_data.size(); i++) { if (substr_is(this->page_data, i, site_video)) { - episode e = get_episode(this->page_data, i, cookies); + episode e = get_episode(this->page_data, i, session_cookie); if (e.episode_url.empty()) { continue; } diff --git a/src/season.h b/src/season.h index 2f4a448..7d4b651 100644 --- a/src/season.h +++ b/src/season.h @@ -27,17 +27,17 @@ namespace dropout_dl { /// Whether or not to download captions bool download_captions; - episode get_episode(const std::string& html_data, int& start_point, const std::vector& cookies); + episode get_episode(const std::string& html_data, int& start_point, const cookie& session_cookie); /** * * @param html_data - The season page data - * @param cookies - The browser cookies + * @param session_cookie - The cookie used to authenticate * @return A vector of all episodes in the season * * Gets all the episodes of the season and returns in a vector */ - std::vector get_episodes(const std::vector& cookies); + std::vector get_episodes(const cookie& session_cookie); /** * @@ -61,12 +61,12 @@ namespace dropout_dl { * * @param url - The url to the webpage of the season * @param name - The name of the season - * @param cookies - The browser cookies + * @param session_cookie - The cookie used to authenticate * @param series_name - The name of the series * * Creates a season object and populates the needed information. */ - season(const std::string& url, const std::string& name, const std::vector& cookies, const std::string& series_name = "", bool download_captions = false) { + season(const std::string& url, const std::string& name, const cookie& session_cookie, const std::string& series_name = "", bool download_captions = false) { this->url = url; this->download_captions = download_captions; this->season_number = get_season_number(this->url); @@ -74,7 +74,7 @@ namespace dropout_dl { this->series_name = series_name; std::cout << series_name << ": " << name << ": " << "\n"; this->page_data = get_generic_page(url); - this->episodes = get_episodes(cookies); + this->episodes = get_episodes(session_cookie); } }; diff --git a/src/series.cpp b/src/series.cpp index 8cc32cf..385ea30 100644 --- a/src/series.cpp +++ b/src/series.cpp @@ -28,7 +28,7 @@ namespace dropout_dl { return "ERROR"; } - std::vector series::get_seasons(const std::vector& cookies) { + std::vector series::get_seasons() { std::vector out; std::string search_class("js-switch-season"); @@ -91,7 +91,7 @@ namespace dropout_dl { } season_name = season_name.substr(name_start, season_name.size() - name_start - name_end); - out.emplace_back(season_url, season_name, cookies, this->name, this->download_captions); + out.emplace_back(season_url, season_name, this->session_cookie, this->name, this->download_captions); std::cout << out.back().name << ": " << out.back().url << '\n'; @@ -115,7 +115,7 @@ namespace dropout_dl { } - season series::get_season(const std::string &url, const std::vector& cookies, bool download_captions) { + season series::get_season(const std::string &url, const cookie& session_cookie, bool download_captions) { std::string html_data = get_generic_page(url); std::string search_class("js-switch-season"); @@ -184,7 +184,7 @@ namespace dropout_dl { season_name = season_name.substr(name_start, season_name.size() - name_start - name_end); - return {season_url, season_name, cookies, get_series_name(html_data), download_captions}; + return {season_url, season_name, session_cookie, get_series_name(html_data), download_captions}; } season_url.clear(); diff --git a/src/series.h b/src/series.h index e38145b..e7c6824 100644 --- a/src/series.h +++ b/src/series.h @@ -25,7 +25,7 @@ namespace dropout_dl { /// A vector containing all the season that this series include std::vector seasons; /// A vector containing the cookies needed to download episodes - std::vector cookies; + dropout_dl::cookie session_cookie; /// Whether or not to download captions bool download_captions; @@ -39,14 +39,12 @@ namespace dropout_dl { static std::string get_series_name(const std::string& html_data); /** - * - * @param cookies - The cookies from a browser * * Scrapes the series page for the names and link of all the season. Creates season objects for each of these. * These season object contain all the episodes of the season as episode objects. * The cookies this function takes are passed to the episode objects. */ - std::vector get_seasons(const std::vector& cookies); + std::vector get_seasons(); /** * @@ -56,7 +54,7 @@ namespace dropout_dl { * * Gets the season page, which is really just a series page, and creates a season object with all the episodes of the season */ - static season get_season(const std::string& url, const std::vector& cookies, bool download_captions); + static season get_season(const std::string& url, const cookie& session_cookie, bool download_captions); /** * @@ -74,12 +72,12 @@ namespace dropout_dl { * * Creates a series object and populates the needed variables */ - series(const std::string& url, const std::vector& cookies, bool download_captions = false) { + series(const std::string& url, const dropout_dl::cookie& session_cookie, bool download_captions = false) { this->url = url; this->download_captions = download_captions; this->page_data = get_generic_page(url); this->name = get_series_name(page_data); - this->cookies = cookies; + this->session_cookie = session_cookie; if (name == "ERROR") { std::cerr << "SERIES PARSE ERROR: Could not parse series name\n"; exit(10); @@ -87,7 +85,7 @@ namespace dropout_dl { this->series_directory = format_filename(name); - this->seasons = this->get_seasons(cookies); + this->seasons = this->get_seasons(); } }; diff --git a/src/util.cpp b/src/util.cpp index 304c0c8..2ed4ab6 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -225,62 +225,6 @@ namespace dropout_dl { } - std::string get_generic_page_with_cookies(const std::string& url, std::string& session, std::string& cf_bm) { - CURL *hnd; - struct curl_slist *slist1; - - std::string page_data; - - slist1 = nullptr; - std::string cookies = "Cookie: _session=" + session + "; __cf_bm=" + cf_bm; - slist1 = curl_slist_append(slist1, "Accept: text/html"); - slist1 = curl_slist_append(slist1, "Accept-Language: en-US,en"); - slist1 = curl_slist_append(slist1, "Accept-Encoding: utf-8"); - slist1 = curl_slist_append(slist1, "DNT: 1"); - slist1 = curl_slist_append(slist1, "Connection: keep-alive"); - slist1 = curl_slist_append(slist1, "Referer: https://www.dropout.tv/"); - slist1 = curl_slist_append(slist1, "Upgrade-Insecure-Requests: 1"); - slist1 = curl_slist_append(slist1, "Sec-Fetch-Mode: navigate"); - slist1 = curl_slist_append(slist1, "Sec-Fetch-Site: cross-site"); - slist1 = curl_slist_append(slist1, cookies.c_str()); - slist1 = curl_slist_append(slist1, "Sec-GPC: 1"); - - hnd = curl_easy_init(); - curl_easy_setopt(hnd, CURLOPT_BUFFERSIZE, 102400L); - curl_easy_setopt(hnd, CURLOPT_URL, url.c_str()); - curl_easy_setopt(hnd, CURLOPT_NOPROGRESS, 1L); - curl_easy_setopt(hnd, CURLOPT_HTTPHEADER, slist1); - curl_easy_setopt(hnd, CURLOPT_MAXREDIRS, 50L); - curl_easy_setopt(hnd, CURLOPT_HTTP_VERSION, (long)CURL_HTTP_VERSION_2TLS); - curl_easy_setopt(hnd, CURLOPT_FTP_SKIP_PASV_IP, 1L); - curl_easy_setopt(hnd, CURLOPT_TCP_KEEPALIVE, 1L); - - curl_easy_setopt(hnd, CURLOPT_WRITEFUNCTION, WriteCallback); - curl_easy_setopt(hnd, CURLOPT_WRITEDATA, &page_data); - - std::string header_string; - curl_easy_setopt(hnd, CURLOPT_HEADERFUNCTION, WriteCallback); - curl_easy_setopt(hnd, CURLOPT_HEADERDATA, &header_string); - - curl_easy_perform(hnd); - - - if (header_string.find("set-cookie: _session=")) { - std::cout << "updated session " << session << "->"; - session = get_substring_in(header_string, "set-cookie: _session=", ";"); - std::cout << session << "\n"; - } - - curl_easy_cleanup(hnd); - hnd = nullptr; - curl_slist_free_all(slist1); - slist1 = nullptr; - - return page_data; - } - - - std::string get_substring_in(const std::string& string, const std::string& begin, const std::string& end, int starting_index) { size_t substring_start = string.find(begin, starting_index); diff --git a/src/util.h b/src/util.h index bb31400..4679d03 100644 --- a/src/util.h +++ b/src/util.h @@ -121,18 +121,6 @@ namespace dropout_dl { */ std::string get_generic_page(const std::string& url, bool verbose = false, std::string* header_string = nullptr); - /** - * - * @param url - Url which is being downloaded - * @param session - _session cookie. this is updated if possible - * @param cf_bm - __cf_bm cookie. this is updated if possible - * @return The page data as a string - * - * This function downloads the provided url and returns it as a string. Does not use cookies. This was ripped directly from a firefox network request for an episode page and modified minimally. - */ - std::string get_generic_page_with_cookies(const std::string& url, std::string& session, std::string& cf_bm); - - /** * * @param string - the string that is searched