fix two encoding bugs
reencode() takes three arguments in the order (txt, from, to), opposed to reencode_string, which will, like iconv, handle the arguments with from and to swapped. Fix that (this makes reencode more intuitive). If src and dst encoding are equivalent, don't do any encoding. If no special encoding parameter is found within the commit, assume UTF-8 and explicitly convert to PAGE_ENCODING. The change to reencode() mentioned above avoids re-encoding a UTF-8 string to UTF-8, for example. Signed-off-by: Julius Plenz <plenz@cis.fu-berlin.de> Signed-off-by: Lars Hjemli <hjemli@gmail.com>
This commit is contained in:
		
							parent
							
								
									5db02854e6
								
							
						
					
					
						commit
						0a799424f6
					
				
							
								
								
									
										24
									
								
								parsing.c
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								parsing.c
									
									
									
									
									
								
							| @ -106,7 +106,11 @@ const char *reencode(char **txt, const char *src_enc, const char *dst_enc) | |||||||
| 	if (!txt || !*txt || !src_enc || !dst_enc) | 	if (!txt || !*txt || !src_enc || !dst_enc) | ||||||
| 		return *txt; | 		return *txt; | ||||||
| 
 | 
 | ||||||
| 	tmp = reencode_string(*txt, src_enc, dst_enc); | 	/* no encoding needed if src_enc equals dst_enc */ | ||||||
|  | 	if(!strcasecmp(src_enc, dst_enc)) | ||||||
|  | 		return *txt; | ||||||
|  | 
 | ||||||
|  | 	tmp = reencode_string(*txt, dst_enc, src_enc); | ||||||
| 	if (tmp) { | 	if (tmp) { | ||||||
| 		free(*txt); | 		free(*txt); | ||||||
| 		*txt = tmp; | 		*txt = tmp; | ||||||
| @ -160,6 +164,10 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) | |||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
| 
 | 
 | ||||||
|  | 	/* if no special encoding is found, assume UTF-8 */ | ||||||
|  | 	if(!ret->msg_encoding) | ||||||
|  | 		ret->msg_encoding = xstrdup("UTF-8"); | ||||||
|  | 
 | ||||||
| 	// skip unknown header fields
 | 	// skip unknown header fields
 | ||||||
| 	while (p && *p && (*p != '\n')) { | 	while (p && *p && (*p != '\n')) { | ||||||
| 		p = strchr(p, '\n'); | 		p = strchr(p, '\n'); | ||||||
| @ -189,14 +197,12 @@ struct commitinfo *cgit_parse_commit(struct commit *commit) | |||||||
| 	} else | 	} else | ||||||
| 		ret->subject = xstrdup(p); | 		ret->subject = xstrdup(p); | ||||||
| 
 | 
 | ||||||
| 	if (ret->msg_encoding) { | 	reencode(&ret->author, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->author, PAGE_ENCODING, ret->msg_encoding); | 	reencode(&ret->author_email, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->author_email, PAGE_ENCODING, ret->msg_encoding); | 	reencode(&ret->committer, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->committer, PAGE_ENCODING, ret->msg_encoding); | 	reencode(&ret->committer_email, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->committer_email, PAGE_ENCODING, ret->msg_encoding); | 	reencode(&ret->subject, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->subject, PAGE_ENCODING, ret->msg_encoding); | 	reencode(&ret->msg, ret->msg_encoding, PAGE_ENCODING); | ||||||
| 		reencode(&ret->msg, PAGE_ENCODING, ret->msg_encoding); |  | ||||||
| 	} |  | ||||||
| 
 | 
 | ||||||
| 	return ret; | 	return ret; | ||||||
| } | } | ||||||
|  | |||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user