GNU libstdc++ custom std::codecvt linker error

I developed several custom converters and want to comment on the archived topic http://www.cplusplus.com/forum/general/18596/ (MinGW custom codecvt facet VTABLE error). While using my UTF-32 converter with a type based on 'unsigned long' (because MSVC's wchar_t is 16-bit) I stumbled over the well-known linker error:

./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x20): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_out(__mbstate_t&, unsigned long const*, unsigned long const*, unsigned long const*&, char*, char*, char*&) const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x28): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_unshift(__mbstate_t&, char*, char*, char*&) const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x30): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_in(__mbstate_t&, char const*, char const*, char const*&, unsigned long*, unsigned long*, unsigned long*&) const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x38): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_encoding() const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x40): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_always_noconv() const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x48): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_length(__mbstate_t&, char const*, char const*, unsigned long) const'
./codecvt_test.o:(.rodata._ZTVSt7codecvtImc11__mbstate_tE[_ZTVSt7codecvtImc11__mbstate_tE]+0x50): undefined reference to `std::codecvt<unsigned long, char, __mbstate_t>::do_max_length() const'


In one of the comments a workaround is proposed where the codecvt is derived from the internal base class std::__codecvt_abstract_base. But this breaks std::basic_filebuf, because it uses std::has_facet<std::codecvt<char_type, char, state_type>>, and since the custom codecvt is not derived from std::codecvt the facet is not found (has_facet uses a dynamic_cast).

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
#include "codecvt_custom.hpp"
#include <cstdlib>
#include <fstream>
#include <locale>

int
main(int /*argc*/, char* /*argv*/[])
{
  std::basic_ifstream<char_custom> ifstream_custom;
    ifstream_custom.imbue(std::locale(ifstream_custom.getloc(),
      new codecvt_custom<char_custom, char, std::mbstate_t>));
  bool const has_std =
    std::has_facet<std::codecvt<char_custom, char, std::mbstate_t> >(
      ifstream_custom.getloc());
  bool const has_custom =
    std::has_facet<codecvt_custom<char_custom, char, std::mbstate_t> >(
      ifstream_custom.getloc());
  if (has_std && has_custom)
    return (EXIT_SUCCESS);
  return (EXIT_FAILURE);
}


After messing around with this problem I ended up with adding a locale_glibcxx.cpp to the project where all custom types are added:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#include <locale>
#include <stdexcept>

//
//TODO: Check if the GNU libstdc++ std::codecvt template is fixed.
//
// The GNU libstdc++ std::codecvt template is broken for custom types
// but we have to derive from std::codecvt because std::basic_filebuf
// uses has_facet<codecvt<InternT>> and this is using a dynamic_cast.
// Therefore we have to add all custom types at the end of this file.
//

#if defined(__GLIBCXX__) && !defined(CODECVT_NO_GLIBCXX_HACK)

#ifdef _GLIBCXX_NOTHROW
#define CODECVT_NOTHROW _GLIBCXX_NOTHROW
#else
#define CODECVT_NOTHROW throw()
#endif

#define CODECVT_DEFINE_PURE(InternT, ExternT, StateT)                          \
namespace std {                                                                \
template<>                                                                     \
bool                                                                           \
codecvt<InternT, ExternT, StateT>::do_always_noconv(void) const CODECVT_NOTHROW\
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_always_noconv");      \
}                                                                              \
template<>                                                                     \
int                                                                            \
codecvt<InternT, ExternT, StateT>::do_encoding(void) const CODECVT_NOTHROW     \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_encoding");           \
}                                                                              \
template<>                                                                     \
codecvt_base::result                                                           \
codecvt<InternT, ExternT, StateT>::do_in(state_type&,                          \
  extern_type const*, extern_type const*, extern_type const*&,                 \
  intern_type*, intern_type*, intern_type*&) const                             \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_in");                 \
}                                                                              \
template<>                                                                     \
int                                                                            \
codecvt<InternT, ExternT, StateT>::do_length(state_type&,                      \
  extern_type const*, extern_type const*, size_t) const                        \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_length");             \
}                                                                              \
template<>                                                                     \
int                                                                            \
codecvt<InternT, ExternT, StateT>::do_max_length(void) const CODECVT_NOTHROW   \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_max_length");         \
}                                                                              \
template<>                                                                     \
codecvt_base::result                                                           \
codecvt<InternT, ExternT, StateT>::do_out(state_type&,                         \
  intern_type const*, intern_type const*, intern_type const*&,                 \
  extern_type*, extern_type*, extern_type*&) const                             \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_out");                \
}                                                                              \
template<>                                                                     \
codecvt_base::result                                                           \
codecvt<InternT, ExternT, StateT>::do_unshift(state_type&,                     \
  extern_type*, extern_type*, extern_type*&) const                             \
{                                                                              \
  throw logic_error("pure virtual method called"                               \
    "std::codecvt<"#InternT", "#ExternT", "#StateT">::do_unshift");            \
}                                                                              \
} /* namespace std */

CODECVT_DEFINE_PURE(unsigned char, char, std::mbstate_t)
CODECVT_DEFINE_PURE(unsigned long, char, std::mbstate_t)

#endif /* __GLIBCXX__ && !CODECVT_NO_GLIBCXX_HACK */ 


If somebody knows a better solution, please drop a comment.

Best regards,
Nico Bendlin
Last edited on
With g++ 5.1, custom codecvt facets appear to work as required by the standard.

However, the libstdc++ standard narrow stream objects assume that do_always_noconv() is guaranteed to return false if standard C++ streams are synchronized with the standard C streams. So the code conversion functions in the custom facet are never called by them unless std::ios_base::sync_with_stdio(false) ; is called right at the beginning.

http://coliru.stacked-crooked.com/a/4e3c7838997ebc52
http://rextester.com/KZVQVG41648
Many thanks for the comments. But your example code derives from the existing explicit template specialization std::codecvt<char, char, std::mbstate_t>. My problem happens when introducing new internal types.

I'm working on Debian Stretch (gcc 4.9.3, libstdc++ 4.9.1, Eclipse 3.8.1).

Well, in the meantime I did an "Release" build and the linker error does not happen.
So it seems to happen only if optimizations are turned off...

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
#include <algorithm>
#include <fstream>
#include <iostream>
#include <locale>

template<typename InternT, typename ExternT, typename StateT>
class codecvt_identity : public std::codecvt<InternT, ExternT, StateT> {
	typedef std::codecvt<InternT, ExternT, StateT> base_type;
	codecvt_identity(codecvt_identity const&);
	codecvt_identity& operator=(codecvt_identity const&);
public:
	typedef typename base_type::result result;
	typedef typename base_type::intern_type intern_type;
	typedef typename base_type::extern_type extern_type;
	typedef typename base_type::state_type state_type;
	explicit codecvt_identity(std::size_t refs = 0)
		: base_type(refs)
	{
	}
	virtual ~codecvt_identity(void)
	{
	}
protected:
	virtual bool do_always_noconv(void) const throw()
	{
		return (true);
	}
	virtual int do_encoding(void) const throw()
	{
		return (1);
	}
	virtual result do_in(state_type& state,
		extern_type const* from, extern_type const* from_end, extern_type const*& from_next,
		intern_type* to, intern_type* to_end, intern_type*& to_next) const
	{
		(void)state; (void)from_end; (void)to_end;
		from_next = from;
		to_next = to;
		return (std::codecvt_base::noconv);
	}
	virtual int do_length(state_type& state,
		extern_type const* from, extern_type const* from_end,
		std::size_t max) const
	{
		(void)state;
		return (static_cast<int>(std::min(static_cast<std::size_t>(from_end - from), max)));
	}
	virtual int do_max_length(void) const throw()
	{
		return (1);
	}
	virtual result do_out(state_type& state,
		intern_type const* from, intern_type const* from_end, intern_type const*& from_next,
		extern_type* to, extern_type* to_end, extern_type*& to_next) const
	{
		(void)state; (void)from_end; (void)to_end;
		from_next = from;
		to_next = to;
		return (std::codecvt_base::noconv);
	}
	virtual result do_unshift(state_type& state,
		extern_type* to, extern_type* to_end, extern_type*& to_next) const
	{
		(void)state; (void)to_end;
		to_next = to;
		return (std::codecvt_base::noconv);
	}
};

int main()
{
	typedef std::codecvt     <unsigned char, char, std::mbstate_t> codecvt_base;
	typedef codecvt_identity <unsigned char, char, std::mbstate_t> codecvt_test;
	std::basic_ifstream<unsigned char> ifs;
	ifs.imbue(std::locale(ifs.getloc(), new codecvt_test));
	std::cout << "base: " << std::boolalpha << std::has_facet<codecvt_base>(ifs.getloc()) << std::endl;
	std::cout << "test: " << std::boolalpha << std::has_facet<codecvt_test>(ifs.getloc()) << std::endl;
}
Topic archived. No new replies allowed.