I'm working on auto replacing contents in a file, the re.search() are successfully got the new_content, but it contains special characters and when I want to use re.sub() it shows :
error: invalid group reference 3 at position 85 (line 2, column 8)
I also tried using re.escape(), it will cause new_content unusable even it can replace successfully.
Here is my code :
import re
pattern = r'const\s+uint8_t\s+u8g2_custom_font\[\d+\] U8G2_FONT_SECTION.+?";\s'
with open('u8g2_custom_font.c', 'r') as file:
content = file.read()
new_content = re.search(pattern, content, flags=re.DOTALL).group(0)
with open('u8g2_fonts.c', 'r') as file:
content = file.read()
modified_content = re.sub(pattern, new_content, content, flags=re.DOTALL)
and here is the new_content :
const uint8_t u8g2_custom_font[1242] U8G2_FONT_SECTION("u8g2_custom_font") =
";\0\3\2\5\5\3\5\6\20\20\0\376\16\376\13\377\0\0\1#\1\362\12 \20\322\301H\372\235\244"
"s\322i\351 )i\16\245\203\244\244\71\64(\261\316I\347\244\223\222~ \6\0\320\243\0(\14\203"
"\375\241J\242$\352-\312\2)\15\203\371\241\310\242,\352%J\42\0-\7$\230\242\30\2.\7B"
"\34\242\30\2/\14F\25\242-V\303\64\254\246\0\60\21F\25\242\322\242$\324\246D\211\66\61\211\62"
"\11\61\13E\31\242\312\244$\354\323 \62\17F\25\242\31\222PL\63-\254\246\303\0\63\20F\25\242"
"\31\222PL\243\71\25\305dH\0\64\20F\25\242\14\265$\252dI\226\14cZ\1\71\16F\25\242"
"\31\222\320\230\14jc\64\1:\10\342<\242\30\342!C\15F\25\242\31\222\320\332QL\206\4F\14"
"F\25\242\70\244\325AI\273\2H\13F\25\242\10\35\207A\364\30L\11F\25\242H\373\353\60P\15"
"F\25\242\30\224\320\70,iW\0R\20F\25\242\30\224\320\70,Q-\311\222P\14T\12G\25\242"
"\70dq\177\3Y\16G\25\242H\325$\213\262J\32w\3_\7'\364\241\70\4a\16\6\25\242\31"
"\222\60M\206\321\246,\1c\15\6\25\242\31\222P\355\230\14\11\0d\14f\25\242mY\264\321\233\262"
"\4e\17\6\25\242\31\222P\34\6\265\230\14\11\0h\13f\25\242H[\26M\364\61i\13e\31\242"
"\12sD\354\323 j\14\245\325\241\254#b\37\245H\2m\22\7\25\242X\224(\222\42)\222\42)"
"\222\42\251\0n\12\6\25\242H\26M\364\61o\14\6\25\242\31\222\320\307dH\0p\16F\325\241H"
"\26Mt\334\224%M\1r\13\6\25\242H\26MT\273\2s\14\6\25\242\31\222PvL\206\4u"
"\12\6\25\242\10\375\246,\1w\21\7\25\242H\245H\212\244H\212\244H\252X\0\334\15\306\25\242\211"
"\332\341\320\77&C\2\0\0\0\4\377\377 &\12G\24\242\210\42\251\0\60\253\31\252\371\301\314\261\34"
"\12\303A\31\264\260[\330\226FY\22\245:\222\1\60\267\26j\35\302\321\261\234\244cq\216\344H\216"
"\344H\30\311:\0\60\350\26i\35\302\34\216\71\222#\341\20m\71\222#\71\222\16\321\30\60\353\30k"
"\31\302\315\263<\312\243<\312JY\251V\312*Y\250\245!\0N\12\37\357\361\301\316\361\34\317\361\34"
"\317\361\34\37\206\34\311\361\34\317\361\34\317\361\34\317\221\341\3W\30)\15\326\301\370\240\246\312pQS"
"i\30\42\251I\32\206Hj\222\206!R\263D\31.z\246\14\27)\314\264H\33\36\4[P\37\357"
"\321\301\31\336\341\34\316\341\34\325\341|\370\236\343\71\236\343\71\236\343\71\232\344p\216\0[\244\42\357\361"
"\301\316)\71\62<(\71\252\243\331p\320s\70\315\207;V\307rh\70\350P\216\347\303\7e\234."
"\17\322\301\314\343\60J\223\60\11\263J\226\206Q\62H\345\64\211\323$\33\206p\14\207\60\251\206I\24"
"fQ\61\312\362\60\311\323\34\211\0h\177*\17\322\301\313\361l\30\302,\15\263aH\206%\15\263a"
"\310tx\7\223\312\60DI\261\24\347\321p\213\363\70\217\363\70\4i\5/\17\322\301\212\363\70\317\206"
"AK\223p\220\262\64K\303h\70$;\222)\311\20U\262\250%\213\212YT\314\206(\314\241\60O"
"\302\34\311\0j\2-\17\322\301\12\323\254\34e\203\324\222%\321\20e\311\224\15RK\226D\203\222%"
"C\226\14b\236\17_\223:\324\254e\222\232\352\71\0mt*\17\322\301\317\322\260\32\305a\222Ur"
"$\256\244\265\70J+\271\224\15C\264\245a\226\206Y\32fi\230\15C\216\244\21\0v\204%\15\326"
"\301J\343\264\34\16\323\60&\241\230\204V\61\312\206!\213\304,\22c\61\26\343a\210\305,\311\301\10"
"}\332\62\17\322\301Ks$\314\221p\30\262$J\243\322\60$C\226\206\331\60dy\32\245\322\260,"
"J\232%u\250\22%\245\226nQ\22FI\226C)\0\215\357,\17\322\301\207\342A\213\263l\310J"
"YK\343 \346i\22\207Y\232\245J\62\15Z\22f\225\60\253\204Ye\312\242u\320\261\22\0\226}"
"/\356\325\301\30\244a\311\242\60\211\262a\211\262\60)\16K\224cQ\64\334\242\70\213\206AK\242$"
"YJ\25%\214\42\65\312\364\244\71\213\0\226\373(\17\322\301\32\16:\224#\303\203\222\306\322\222,\71"
"\222cK\262\223\207\203\330\70\34\304\306\341\240Cq\36\347\310\60\4\234\345#\15\326\301\315\301\34\33\216"
"y\70\34\363p\70\346\350p\220rt\70hQK\324Rj\321\221:\30\1\0";
I would like to replace with the the new_content and wont cause it unusable.
The reason re.escape
doesn't work is in the documentation:
re.escape(pattern)
...
This function must not be used for the replacement string insub()
andsubn()
, only backslashes should be escaped.
Use:
modified_content = re.sub(pattern, new_content.replace('\\', r'\\'), content, flags=re.DOTALL)
# ^^^^^^^^^^^^^^^^^^^^^
Here is a simplified example:
import re
pattern = r'const\s+uint8_t\s+font\[\d+\] FONT.+?";\s'
new_content = r'''const uint8_t font[123] FONT("u8g2_custom_font") =
"\1\2\3\4";
'''
content = r'''
double abc;
const uint8_t font[123] FONT("u8g2_custom_font") = "abcd";
int x = 123;
'''
# reproduces OP exception
try:
print(re.sub(pattern, new_content, content, flags=re.DOTALL))
except re.error as e:
print(e)
# incorrect result using re.escape
print(re.sub(pattern, re.escape(new_content), content, flags=re.DOTALL))
# correct result escaping backslashes only
print(re.sub(pattern, new_content.replace('\\', r'\\'), content, flags=re.DOTALL))
Output:
invalid group reference 1 at position 55 (line 2, column 5)
double abc;
const\ uint8_t\ font\[123\]\ FONT\("u8g2_custom_font"\)\ =\
\ \ "\1\2\3\4";\
int x = 123;
double abc;
const uint8_t font[123] FONT("u8g2_custom_font") =
"\1\2\3\4";
int x = 123;