HttpClient上传文件中文名乱码

2022-07-27 12:17:34

HttpClient上传文件中文名乱码

现象

使用HttpClient工具上传文件时,如果文件名是中文,文件名会乱码

文件名乱码的代码:

    private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,File fileToUpload) {
         MultipartEntityBuilder builder = MultipartEntityBuilder.create();
         builder.addTextBody("scenarioId", scenarioId.toString());
         for (String groupId : groupIds) {
             builder.addTextBody("groupIds", groupId);
         }
         builder.addTextBody("extension", extension);
         builder.addPart("fileToUpload", new FileBody(fileToUpload));
         builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
         builder.addTextBody("isSplit","false");
         builder.addTextBody("isRefresh","false");
         return builder.build();

乱码原因:

HttpClient上传文件时,会调用doWriteTo方法,写一个输出流,但是在调用formatMultipartHeader方法时,底层主要有3种不同的实现,3种方式的采用的字符集不一样

HttpClient中的doWriteTo方法:

void doWriteTo(final OutputStreamout,final boolean writeContent) throws IOException {final ByteArrayBuffer boundaryEncoded = encode(this.charset,this.boundary);for (final FormBodyPart part: getBodyParts()) {
          writeBytes(TWO_DASHES,out);
          writeBytes(boundaryEncoded,out);
          writeBytes(CR_LF,out);//此处代码主要有3种不同的实现,不同的mode,实现方式不一样,采用的字符集也不同
          formatMultipartHeader(part,out);
          writeBytes(CR_LF,out);if (writeContent) {
              part.getBody().writeTo(out);
          }
          writeBytes(CR_LF,out);
      }
      writeBytes(TWO_DASHES,out);
      writeBytes(boundaryEncoded,out);
      writeBytes(TWO_DASHES,out);
      writeBytes(CR_LF,out);
  }

其中的formatMultipartHeader方法,不同的模式有不同的实现方式

MultipartEntityBuilder

    MultipartFormEntity buildEntity() {
        String boundaryCopy = boundary;if (boundaryCopy ==null && contentType !=null) {
            boundaryCopy = contentType.getParameter("boundary");
        }if (boundaryCopy ==null) {
            boundaryCopy = generateBoundary();
        }
        Charset charsetCopy = charset;if (charsetCopy ==null && contentType !=null) {
            charsetCopy = contentType.getCharset();
        }final List<NameValuePair> paramsList =new ArrayList<NameValuePair>(2);
        paramsList.add(new BasicNameValuePair("boundary", boundaryCopy));if (charsetCopy !=null) {
            paramsList.add(new BasicNameValuePair("charset", charsetCopy.name()));
        }final NameValuePair[] params = paramsList.toArray(new NameValuePair[paramsList.size()]);final ContentType contentTypeCopy = contentType !=null ?
                contentType.withParameters(params) :
                ContentType.create("multipart/" + DEFAULT_SUBTYPE, params);final List<FormBodyPart> bodyPartsCopy = bodyParts !=null ?new ArrayList<FormBodyPart>(bodyParts) :
                Collections.<FormBodyPart>emptyList();//此处将mode赋值给modeCopyfinal HttpMultipartMode modeCopy = mode !=null ? mode : HttpMultipartMode.STRICT;final AbstractMultipartForm form;//此处根据modeCopy的值不同,构造3种form,每种的字符集都不一样,也是产生乱码的根源switch (modeCopy) {case BROWSER_COMPATIBLE:
                form =new HttpBrowserCompatibleMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);break;case RFC6532:
                form =new HttpRFC6532Multipart(charsetCopy, boundaryCopy, bodyPartsCopy);break;default:
                form =new HttpStrictMultipart(charsetCopy, boundaryCopy, bodyPartsCopy);
        }returnnew MultipartFormEntity(form, contentTypeCopy, form.getTotalLength());
    }public HttpEntitybuild() {return buildEntity();
    }

BROWSER_COMPATIBLE模式中的formatMultipartHeader方法

classHttpBrowserCompatibleMultipartextendsAbstractMultipartForm {privatefinal List<FormBodyPart> parts;
    public HttpBrowserCompatibleMultipart(final Charset charset,final String boundary,final List<FormBodyPart> parts) {super(charset, boundary);this.parts = parts;
    }@Override
    public List<FormBodyPart> getBodyParts() {returnthis.parts;
    }/**
      * Write the multipart header fields; depends on the style.
      */@Overrideprotected void formatMultipartHeader(final FormBodyPart part,final OutputStream out)throws IOException {// For browser-compatible, only write Content-Disposition// Use content charsetfinal Header header = part.getHeader();final MinimalField cd = header.getField(MIME.CONTENT_DISPOSITION);//可以看到此处的字符集采用的是设置的字符集
        writeField(cd,this.charset, out);final String filename = part.getBody().getFilename();if (filename !=null) {final MinimalField ct = header.getField(MIME.CONTENT_TYPE);//可以看到此处的字符集采用的也是设置的字符集
            writeField(ct,this.charset, out);
        }
    }
}

RFC6532模式中的formatMultipartHeader方法

classHttpRFC6532MultipartextendsAbstractMultipartForm {privatefinal List<FormBodyPart> parts;public HttpRFC6532Multipart(final Charset charset,final String boundary,final List<FormBodyPart> parts) {super(charset, boundary);this.parts = parts;
    }

    @Overridepublic List<FormBodyPart> getBodyParts() {returnthis.parts;
    }

    @Overrideprotectedvoid formatMultipartHeader(final FormBodyPart part,final OutputStream out) throws IOException {// For RFC6532, we output all fields with UTF-8 encoding.final Header header = part.getHeader();for (final MinimalField field: header) {//可以看到此处的字符集默认采用UTF8
            writeField(field, MIME.UTF8_CHARSET, out);
        }
    }
}

默认模式中的formatMultipartHeader方法

classHttpStrictMultipartextendsAbstractMultipartForm {privatefinal List<FormBodyPart> parts;public HttpStrictMultipart(final Charset charset,final String boundary,final List<FormBodyPart> parts) {super(charset, boundary);this.parts = parts;
    }

    @Overridepublic List<FormBodyPart> getBodyParts() {returnthis.parts;
    }

    @Overrideprotectedvoid formatMultipartHeader(final FormBodyPart part,final OutputStream out) throws IOException {// For strict, we output all fields with MIME-standard encoding.//从上面注释中可以看到,此处的字符集采用的是默认字符集即ASCII(下面MIME类中可以看到)final Header header = part.getHeader();for (final MinimalField field: header) {
            writeField(field, out);
        }
    }
}

MIME类

publicfinalclassMIME {publicstaticfinal String CONTENT_TYPE          ="Content-Type";publicstaticfinal String CONTENT_TRANSFER_ENC  ="Content-Transfer-Encoding";publicstaticfinal String CONTENT_DISPOSITION   ="Content-Disposition";publicstaticfinal String ENC_8BIT              ="8bit";publicstaticfinal String ENC_BINARY            ="binary";/** The default character set to be used, i.e. "US-ASCII" */publicstaticfinal Charset DEFAULT_CHARSET      = Consts.ASCII;/** UTF-8 is used for RFC6532 */publicstaticfinal Charset UTF8_CHARSET         = Consts.UTF_8;

}

解决方法

知道乱码产生的根源,乱码问题也就好解决了,解决方式有两种

设置mode为:BROWSER_COMPATIBLE,并设置字符集为UTF8

    private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
                                   File fileToUpload) {
        MultipartEntityBuilder builder = MultipartEntityBuilder.create();

        //设置模式为BROWSER_COMPATIBLE,并设置字符集为UTF8
        builder.setMode(HttpMultipartMode.BROWSER_COMPATIBLE);
        builder.setCharset(Charset.forName("UTF-8"));

        builder.addTextBody("scenarioId", scenarioId.toString());
        for (String groupId : groupIds) {
            builder.addTextBody("groupIds", groupId);
        }
        builder.addTextBody("extension", extension);
        builder.addPart("fileToUpload", new FileBody(fileToUpload));
        builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
        builder.addTextBody("isSplit","false");
        builder.addTextBody("isRefresh","false");
        return builder.build();
    }

设置模式为:RFC6532

    private HttpEntity buildEntity(Long scenarioId, List<String> groupIds, String extension,
                                   File fileToUpload) {
        MultipartEntityBuilder builder = MultipartEntityBuilder.create();

        //设置模式为RFC6532
        builder.setMode(HttpMultipartMode.RFC6532);

        builder.addTextBody("scenarioId", scenarioId.toString());
        for (String groupId : groupIds) {
            builder.addTextBody("groupIds", groupId);
        }
        builder.addTextBody("extension", extension);
        builder.addPart("fileToUpload", new FileBody(fileToUpload));
        builder.addTextBody("type", AssetFileTypeEnum.CSV.getName());
        builder.addTextBody("isSplit","false");
        builder.addTextBody("isRefresh","false");
        return builder.build();
    }
  • 作者:风再起时_yhl
  • 原文链接:https://blog.csdn.net/youshounianhua123/article/details/81100778
    更新时间:2022-07-27 12:17:34