libzypp 17.37.17
RepoMirrorList.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include <iostream>
14#include <fstream>
15#include <utility>
16#include <vector>
17#include <time.h>
19#include <zypp-curl/parser/MetaLinkParser>
20#include <zypp/MediaSetAccess.h>
21#include <zypp/base/LogTools.h>
22#include <zypp/ZConfig.h>
23#include <zypp/PathInfo.h>
25
32
34#include <zypp/media/MediaNetworkCommonHandler.h> // for the authentication workflow
35
37
38
40namespace zypp
41{
43 namespace repo
44 {
45
47 namespace
48 {
56 struct RepoMirrorListTempProvider
57 {
58 RepoMirrorListTempProvider()
59 {}
60
61 RepoMirrorListTempProvider( Pathname localfile_r )
62 : _localfile(std::move( localfile_r ))
63 {}
64
65 RepoMirrorListTempProvider( const Url & url_r )
66 {
67 if ( url_r.schemeIsDownloading()
69 && url_r.getQueryStringMap().count("mirrorlist") > 0 ) {
70
71 // Auth will probably never be triggered, but add it for completeness
72 const auto &authCb = [&]( const zypp::Url &, media::TransferSettings &settings, const std::string & availAuthTypes, bool firstTry, bool &canContinue ) {
73 media::CredentialManager cm(media::CredManagerOptions(ZConfig::instance().repoManagerRoot()));
74 if ( media::MediaNetworkCommonHandler::authenticate( url_r, cm, settings, availAuthTypes, firstTry ) ) {
75 canContinue = true;
76 return;
77 }
78 canContinue = false;
79 };
80
81 internal::MediaNetworkRequestExecutor executor;
82 executor.sigAuthRequired ().connect(authCb);
83
84 _tmpfile = filesystem::TmpFile();
85 _localfile = _tmpfile->path();
86
87 // prepare Url and Settings
88 auto url = url_r;
89 auto tSettings = media::TransferSettings();
90 ::internal::prepareSettingsAndUrl( url, tSettings );
91
92 auto req = std::make_shared<zyppng::NetworkRequest>( url_r, _localfile );
93 req->transferSettings () = tSettings;
94 executor.executeRequest ( req, nullptr );
95
96 // apply umask
97 if ( ::chmod( _localfile.c_str(), filesystem::applyUmaskTo( 0644 ) ) )
98 {
99 ERR << "Failed to chmod file " << _localfile << endl;
100 }
101
102 return;
103 }
104
105 // this will handle traditional media including URL resolver plugins
106 Url abs_url( url_r );
107 abs_url.setPathName( "/" );
108 _access.reset( new MediaSetAccess( zypp::MirroredOrigin{abs_url} ) );
109 _localfile = _access->provideFile( url_r.getPathName() );
110
111 }
112
113 const Pathname & localfile() const
114 { return _localfile; }
115 private:
116 shared_ptr<MediaSetAccess> _access;
117 Pathname _localfile;
118 std::optional<filesystem::TmpFile> _tmpfile;
119 };
120
121 enum class RepoMirrorListFormat {
122 Error,
123 Empty,
124 MirrorListTxt,
125 MirrorListJson,
126 MetaLink
127 };
128
129 static RepoMirrorListFormat detectRepoMirrorListFormat( const Pathname &localfile ) {
130 // a file starting with < is most likely a metalink file,
131 // a file starting with [ is most likely a json file,
132 // else we go for txt
133 MIL << "Detecting RepoMirrorlist Format based on file content" << std::endl;
134
135 if ( localfile.empty () )
136 return RepoMirrorListFormat::Empty;
137
138 InputStream tmpfstream (localfile);
139 auto &str = tmpfstream.stream();
140 auto c = str.get ();
141
142 // skip preceding whitespaces
143 while ( !str.eof () && !str.bad() && ( c == ' ' || c == '\t' || c == '\n' || c == '\r') )
144 c = str.get ();
145
146 if ( str.eof() ) {
147 ERR << "Failed to read RepoMirrorList file, stream hit EOF early." << std::endl;
148 return RepoMirrorListFormat::Empty;
149 }
150
151 if ( str.bad() ) {
152 ERR << "Failed to read RepoMirrorList file, stream became bad." << std::endl;
153 return RepoMirrorListFormat::Error;
154 }
155
156 switch ( c ) {
157 case '<': {
158 MIL << "Detected Metalink, file starts with <" << std::endl;
159 return RepoMirrorListFormat::MetaLink;
160 }
161 case '[': {
162 MIL << "Detected JSON, file starts with [" << std::endl;
163 return RepoMirrorListFormat::MirrorListJson;
164 }
165 default: {
166 MIL << "Detected TXT, file starts with " << c << std::endl;
167 return RepoMirrorListFormat::MirrorListTxt;
168 }
169 }
170 }
171
172 inline std::vector<Url> RepoMirrorListParseXML( const Pathname &tmpfile )
173 {
174 try {
175 media::MetaLinkParser metalink;
176 metalink.parse(tmpfile);
177 return metalink.getUrls();
178 } catch (...) {
179 ZYPP_CAUGHT( std::current_exception() );
180 zypp::parser::ParseException ex("Invalid repo metalink format.");
181 ex.remember ( std::current_exception () );
182 ZYPP_THROW(ex);
183 }
184 }
185
186 inline std::vector<Url> RepoMirrorListParseJSON( const Pathname &tmpfile )
187 {
188 InputStream tmpfstream (tmpfile);
189
190 try {
191 using namespace zyppng::operators;
192 using zyppng::operators::operator|;
193
194 json::Parser parser;
195 auto res = parser.parse ( tmpfstream )
196 | and_then([&]( json::Value data ) {
197
198 std::vector<Url> urls;
199 if ( data.isNull () ) {
200 MIL << "Empty mirrorlist received, no mirrors available." << std::endl;
202 }
203
204 if ( data.type() != json::Value::ArrayType ) {
205 MIL << "Unexpected JSON format, top level element must be an array." << std::endl;
206 return zyppng::expected<std::vector<Url>>::error( ZYPP_EXCPT_PTR( zypp::Exception("Unexpected JSON format, top level element must be an array.") ));
207 }
208 const auto &topArray = data.asArray ();
209 for ( const auto &val : topArray ) {
210 if ( val.type () != json::Value::ObjectType ) {
211 MIL << "Unexpected JSON element, array must contain only objects. Ignoring current element" << std::endl;
212 continue;
213 }
214
215 const auto &obj = val.asObject();
216 for ( const auto &key : obj ) {
217 if ( key.first == "url" ) {
218 const auto &elemValue = key.second;
219 if ( elemValue.type() != json::Value::StringType ) {
220 MIL << "Unexpected JSON element, element \"url\" must contain a string. Ignoring current element" << std::endl;
221 break;
222 }
223 try {
224 MIL << "Trying to parse URL: " << std::string(elemValue.asString()) << std::endl;
225 urls.push_back ( Url( elemValue.asString() ) );
226 } catch ( const url::UrlException &e ) {
227 ZYPP_CAUGHT(e);
228 MIL << "Invalid URL in mirrors file: "<< elemValue.asString() << ", ignoring" << std::endl;
229 }
230 }
231 }
232 }
234 });
235
236 if ( !res ) {
237 using zypp::operator<<;
238 MIL << "Error while parsing mirrorlist: (" << res.error() << "), no mirrors available" << std::endl;
239 ZYPP_RETHROW( res.error () );
240 }
241
242 return *res;
243
244 } catch (...) {
245 ZYPP_CAUGHT( std::current_exception() );
246 MIL << "Caught exception while parsing json" << std::endl;
247
248 zypp::parser::ParseException ex("Invalid repo mirror list format, valid JSON was expected.");
249 ex.remember ( std::current_exception () );
250 ZYPP_THROW(ex);
251 }
252 return {};
253 }
254
255 inline std::vector<Url> RepoMirrorListParseTXT( const Pathname &tmpfile )
256 {
257 InputStream tmpfstream (tmpfile);
258 std::vector<Url> my_urls;
259 std::string tmpurl;
260 while (getline(tmpfstream.stream(), tmpurl))
261 {
262 if ( tmpurl[0] == '#' )
263 continue;
264 try {
265 Url mirrUrl( tmpurl );
266 if ( !mirrUrl.schemeIsDownloading( ) ) {
267 MIL << "Ignoring non downloading URL " << tmpurl << std::endl;
268 }
269 my_urls.push_back(Url(tmpurl));
270 }
271 catch (...)
272 {
273 ZYPP_CAUGHT( std::current_exception() );
274
275 // fail on invalid URLs
276 ERR << "Invalid URL in mirrorlist file." << std::endl;
277
278 zypp::parser::ParseException ex("Invalid repo mirror list format, all Urls must be valid in a mirrorlist txt file.");
279 ex.remember ( std::current_exception () );
280 ZYPP_THROW(ex);
281 }
282 }
283 return my_urls;
284 }
285
287 inline std::vector<Url> RepoMirrorListParse( const Url & url_r, const Pathname & listfile_r )
288 {
289 MIL << "Parsing mirrorlist file: " << listfile_r << " originally received from " << url_r << endl;
290
291 std::vector<Url> mirrorurls;
292 switch( detectRepoMirrorListFormat (listfile_r) ) {
293 case RepoMirrorListFormat::Error:
294 // should not happen, except when the instr goes bad
295 ZYPP_THROW( zypp::parser::ParseException( str::Format("Unable to detect metalink file format for: %1%") % listfile_r ));
296 case RepoMirrorListFormat::Empty:
297 mirrorurls = {};
298 break;
299 case RepoMirrorListFormat::MetaLink:
300 mirrorurls = RepoMirrorListParseXML( listfile_r );
301 break;
302 case RepoMirrorListFormat::MirrorListJson:
303 mirrorurls = RepoMirrorListParseJSON( listfile_r );
304 break;
305 case RepoMirrorListFormat::MirrorListTxt:
306 mirrorurls = RepoMirrorListParseTXT( listfile_r );
307 break;
308 }
309
310 std::vector<Url> ret;
311 for ( auto & murl : mirrorurls )
312 {
313 if ( murl.getScheme() != "rsync" )
314 {
315 std::string pName = murl.getPathName();
316 size_t delpos = pName.find("repodata/repomd.xml");
317 if( delpos != std::string::npos )
318 {
319 murl.setPathName( pName.erase(delpos) );
320 }
321 ret.push_back( murl );
322 }
323 }
324 return ret;
325 }
326
327 } // namespace
329
330 RepoMirrorList::RepoMirrorList( const Url & url_r, const Pathname & metadatapath_r )
331 {
332 PathInfo metaPathInfo( metadatapath_r);
333 std::exception_ptr errors; // we collect errors here
334 try {
335 if ( url_r.getScheme() == "file" )
336 {
337 // never cache for local mirrorlist
338 _urls = RepoMirrorListParse( url_r, url_r.getPathName() );
339 }
340 else if ( !metaPathInfo.isDir() )
341 {
342 // no cachedir or no access
343 RepoMirrorListTempProvider provider( url_r ); // RAII: lifetime of any downloaded files
344 _urls = RepoMirrorListParse( url_r, provider.localfile() );
345 }
346 else
347 {
348 // have cachedir
349 const Pathname cachefile = metadatapath_r / cacheFileName();
350 const Pathname cookiefile = metadatapath_r / cookieFileName();
351 zypp::filesystem::PathInfo cacheinfo( cachefile );
352
353 bool needRefresh = ( !cacheinfo.isFile()
354 // force a update on a old cache ONLY if the user can write the cache, otherwise we use an already existing cachefile
355 // it makes no sense to continously download the mirrors file if we can't store it
356 || ( cacheinfo.mtime() < time(NULL) - (long) ZConfig::instance().repo_refresh_delay() * 60 && metaPathInfo.userMayRWX () ) )
357 || ( makeCookie( url_r ) != readCookieFile( cookiefile ) );
358
359 // up to date: try to parse and use the URLs if sucessful
360 // otherwise fetch the URL again
361 if ( !needRefresh ) {
362 MIL << "Mirror cachefile cookie valid and cache is not too old, skipping download (" << cachefile << ")" << std::endl;
363 try {
364 _urls = RepoMirrorListParse( url_r, cachefile );
365 return;
366 } catch ( const zypp::Exception & e ) {
367 ZYPP_CAUGHT(e);
368 auto ex = e;
369 if ( errors )
370 ex.remember(errors);
371 errors = std::make_exception_ptr(ex);
372 MIL << "Invalid mirrorlist cachefile, deleting it and trying to fetch a new one" << std::endl;
373 }
374 }
375
376 // remove the old cache and its cookie, it's either broken, empty or outdated
377 if( cacheinfo.isFile() ) {
378 filesystem::unlink(cachefile);
379 }
380
381 if ( zypp::filesystem::PathInfo(cookiefile).isFile() ) {
382 filesystem::unlink(cookiefile);
383 }
384
385 MIL << "Getting MirrorList from URL: " << url_r << endl;
386 RepoMirrorListTempProvider provider( url_r ); // RAII: lifetime of downloaded file
387 _urls = RepoMirrorListParse( url_r, provider.localfile() );
388
389 // removed the && !_urls.empty() condition , we need to remember "no URLs" as well
390 // otherwise RepoInfo keeps spamming the server with requests
391 if ( metaPathInfo.userMayRWX() ) {
392 // Create directory, if not existing
393 DBG << "Copy MirrorList file to " << cachefile << endl;
394 zypp::filesystem::assert_dir( metadatapath_r );
395 if( zypp::filesystem::hardlinkCopy( provider.localfile(), cachefile ) != 0 ) {
396 // remember empty file
398 }
399 saveToCookieFile ( cookiefile, url_r );
400 // NOTE: Now we copied the mirrorlist into the metadata directory, but
401 // in case of refresh going on, new metadata are prepared in a sibling
402 // temp dir. Upon success RefreshContext<>::saveToRawCache() exchanges
403 // temp and metadata dirs. There we move an existing mirrorlist file into
404 // the new metadata dir.
405 }
406 }
407 } catch ( const zypp::Exception &e ) {
408 // Make a more user readable exception
409 ZYPP_CAUGHT(e);
410 parser::ParseException ex( str::Format("Failed to parse/receive mirror information for URL: %1%") % url_r );
411 ex.remember(e);
412 if ( errors ) ex.remember(errors);
413 ZYPP_THROW(ex);
414 }
415 }
416
418 {
419 static const std::vector<std::string> hosts{
420 "download.opensuse.org",
421 "cdn.opensuse.org"
422 };
423 return ( std::find( hosts.begin(), hosts.end(), str::toLower( url.getHost() )) != hosts.end() );
424 }
425
426 std::string RepoMirrorList::readCookieFile(const Pathname &path_r)
427 {
428 std::ifstream file( path_r.c_str() );
429 if ( not file ) {
430 WAR << "No cookie file " << path_r << endl;
431 return {};
432 }
433
434 return str::getline( file );
435 }
436
440 std::string RepoMirrorList::makeCookie( const Url &url_r )
441 {
443 }
444
445 void RepoMirrorList::saveToCookieFile(const Pathname &path_r, const Url &url_r )
446 {
447 std::ofstream file(path_r.c_str());
448 if (!file) {
449 ERR << str::Str() << "Can't open " << path_r.asString() << std::endl;
450 return;
451 }
452 MIL << "Saving mirrorlist cookie file " << path_r << std::endl;
453 file << makeCookie(url_r);
454 file.close();
455 }
456
458 } // namespace repo
461} // namespace zypp
std::string checksum() const
Definition CheckSum.cc:170
static CheckSum sha256FromString(const std::string &input_r)
Definition CheckSum.h:107
Base class for Exception.
Definition Exception.h:153
void remember(const Exception &old_r)
Store an other Exception as history.
Definition Exception.cc:154
Url manipulation class.
Definition Url.h:93
std::string getScheme() const
Returns the scheme name of the URL.
Definition Url.cc:551
std::string asCompleteString() const
Returns a complete string representation of the Url object.
Definition Url.cc:523
std::string getPathName(EEncoding eflag=zypp::url::E_DECODED) const
Returns the path name from the URL.
Definition Url.cc:622
static ZConfig & instance()
Singleton ctor.
Definition ZConfig.cc:940
Wrapper class for stat/lstat.
Definition PathInfo.h:226
const char * c_str() const
String representation.
Definition Pathname.h:112
const std::string & asString() const
String representation.
Definition Pathname.h:93
zyppng::expected< Value > parse(const InputStream &input_r)
Parse the stream.
Definition json.cc:37
bool isNull() const
Definition JsonValue.h:277
Type type() const
Definition JsonValue.h:281
const Array & asArray() const
Definition JsonValue.h:269
bool authenticate(const Url &url, TransferSettings &settings, const std::string &availAuthTypes, bool firstTry)
static std::string makeCookie(const zypp::Url &url_r)
Generates the cookie value, currently this is only derived from the Url.
static std::string readCookieFile(const Pathname &path_r)
RepoMirrorList(const Url &url_r, const Pathname &metadatapath_r)
static void saveToCookieFile(const Pathname &path_r, const zypp::Url &url_r)
static bool urlSupportsMirrorLink(const zypp::Url &url)
static constexpr const char * cookieFileName()
static constexpr const char * cacheFileName()
std::vector< Url > _urls
void prepareSettingsAndUrl(zypp::Url &url_r, zypp::media::TransferSettings &s)
int assert_file(const Pathname &path, unsigned mode)
Create an empty file if it does not yet exist.
Definition PathInfo.cc:1191
mode_t applyUmaskTo(mode_t mode_r)
Modify mode_r according to the current umask ( mode_r & ~getUmask() ).
Definition PathInfo.h:805
int hardlinkCopy(const Pathname &oldpath, const Pathname &newpath)
Create newpath as hardlink or copy of oldpath.
Definition PathInfo.cc:888
int unlink(const Pathname &path)
Like 'unlink'.
Definition PathInfo.cc:705
int assert_dir(const Pathname &path, unsigned mode)
Like 'mkdir -p'.
Definition PathInfo.cc:324
@ Error
Definition IOTools.h:74
std::string getline(std::istream &str)
Read one line from stream.
Definition IOStream.cc:33
std::string toLower(const std::string &s)
Return lowercase version of s.
Definition String.cc:180
std::string getline(std::istream &str, const Trim trim_r)
Return stream content up to (but not returning) the next newline.
Definition String.cc:481
Url details namespace.
Definition UrlBase.cc:58
Easy-to use interface to the ZYPP dependency resolver.
static expected< std::decay_t< Type >, Err > make_expected_success(Type &&t)
Definition expected.h:397
ResultType and_then(const expected< T, E > &exp, Function &&f)
Definition expected.h:423
zypp::Url Url
Definition url.h:15
Convenient building of std::string with boost::format.
Definition String.h:254
Convenient building of std::string via std::ostringstream Basically a std::ostringstream autoconverti...
Definition String.h:213
#define ZYPP_RETHROW(EXCPT)
Drops a logline and rethrows, updating the CodeLocation.
Definition Exception.h:479
#define ZYPP_CAUGHT(EXCPT)
Drops a logline telling the Exception was caught (in order to handle it).
Definition Exception.h:475
#define ZYPP_EXCPT_PTR(EXCPT)
Drops a logline and returns Exception as a std::exception_ptr.
Definition Exception.h:463
#define ZYPP_THROW(EXCPT)
Drops a logline and throws the Exception.
Definition Exception.h:459
#define DBG
Definition Logger.h:99
#define MIL
Definition Logger.h:100
#define ERR
Definition Logger.h:102
#define WAR
Definition Logger.h:101