首先本小工具使用C# winfrom 实现,其中主要是使用了百度智能云OCR文字识别技术,调用期官网接口,很简单,搭配NPOI Execl操作类库,
利用Spire.pdf类库,把pdf格式发票,转换为png图片格式。自动识别图片、pdf格式发票,发票可以用高拍仪、手机拍照、扫面件等都可以识别。
其他说明:本程序借助百度智能云API作为基础的发票识别技术。
发票识别助手共分5个功能模块,操作相对很简单,第一步点击添加发票按钮,选择要识别的发票信息。注意说明:目前图片格式支持jpg、png、bmp,图片的长和宽要求最短边大于10px,
最长边小于2048px;图像编码后大小必须小于4M,建议不要超过1M;第二步点击识别发票按钮,系统开始识别发票信息,识别完成后,发票信息会自动生成;
介绍一下关键的代码:
一、获取百度云API token,这个是官方给的,直接拿过来用就可以了。
public static class AccessToken
{
// 百度云中开通对应服务应用的 API Key 建议开通应用的时候多选服务
private static String clientId = ConfigurationManager.AppSettings.Get("APIKey");
// 百度云中开通对应服务应用的 Secret Key
private static String clientSecret = ConfigurationManager.AppSettings.Get("SecretKey");
public static String getAccessToken()
{
String authHost = "https://aip.baidubce.com/oauth/2.0/token";
HttpClient client = new HttpClient();
List<KeyValuePair<String, String>> paraList = new List<KeyValuePair<string, string>>();
paraList.Add(new KeyValuePair<string, string>("grant_type", "client_credentials"));
paraList.Add(new KeyValuePair<string, string>("client_id", clientId));
paraList.Add(new KeyValuePair<string, string>("client_secret", clientSecret));
HttpResponseMessage response = client.PostAsync(authHost, new FormUrlEncodedContent(paraList)).Result;
String result = response.Content.ReadAsStringAsync().Result;
// Console.WriteLine(result);
AccessTokenInfo tokenInfo = JsonConvert.DeserializeObject<AccessTokenInfo>(result);
return tokenInfo.access_token;
}
}
public class AccessTokenInfo
{
public string refresh_token { get; set; }
public string expires_in { get; set; }
public string session_key { get; set; }
public string access_token { get; set; }
public string scope { get; set; }
public string session_secret { get; set; }
}
二、增值税票识别请求过程和参数传递,也是官方给的例子,自己按照需求修改一下就可以了。
// 增值税发票识别
public static string vatInvoice(string fileName)
{
string host = "https://aip.baidubce.com/rest/2.0/ocr/v1/vat_invoice?access_token=" + token;
Encoding encoding = Encoding.Default;
System.Net.HttpWebRequest request = (HttpWebRequest)WebRequest.Create(host);
request.Method = "post";
request.KeepAlive = true;
// 图片的base64编码
string base64 = getFileBase64(fileName);
String str = "image=" + UrlEncode(base64);
byte[] buffer = encoding.GetBytes(str);
request.ContentLength = buffer.Length;
request.GetRequestStream().Write(buffer, 0, buffer.Length);
HttpWebResponse response = (HttpWebResponse)request.GetResponse();
StreamReader reader = new StreamReader(response.GetResponseStream(), Encoding.UTF8);
string result = reader.ReadToEnd();
return result;
}
public static String getFileBase64(String fileName)
{
FileStream filestream = new FileStream(fileName, FileMode.Open, System.IO.FileAccess.Read, FileShare.ReadWrite);
byte[] arr = new byte[filestream.Length];
filestream.Read(arr, 0, (int)filestream.Length);
string baser64 = Convert.ToBase64String(arr);
filestream.Close();
return baser64;
}
public static string UrlEncode(string str)
{
StringBuilder sb = new StringBuilder();
byte[] byStr = System.Text.Encoding.UTF8.GetBytes(str); //默认System.Text.Encoding.Default.GetBytes(str)
for (int i = 0; i < byStr.Length; i++)
{
sb.Append(@"%" + Convert.ToString(byStr[i], 16));
}
return (sb.ToString());
}
三、这里的部分是把pdf格式的发票,自动转换为png格式,提供出百度云api需要的文件格式。
private ImageList GetImage(string[] files)
{
ImageList list = new ImageList();
for (int i = 0; i < files.Length; i++)
{
list.Images.Add(files[i], Image.FromFile(files[i]));
list.ImageSize = new Size(80, 60);
}
return list;
}
private string[] GetImages()
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Multiselect = true;//设置 选择多个文件
ofd.InitialDirectory = @"C:\images\";//设置初始目录 TODO:改为系统默认我的文档中的图片文件夹
ofd.Multiselect = true;
//ofd.Filter = "JPG(*.jpg)|*.jpg|JPEG(*.jpeg)|*.jpeg|PNG(*.png)|*.png|GIF(*.gif)|*.gif|所有文件(*.*)|*.*";
ofd.Title = "请选择要识别的发票的图片";
ofd.Filter = "图片文件(*.jpg *.jpeg *.bmp *.png)|*.jpg;*.jpeg;*.bmp;*.png;*.pdf";
if (ofd.ShowDialog() == DialogResult.OK && ofd.FileNames != null)
{
string[] files = ofd.FileNames;
//pdf文件转换为png图片文件
string imageName = "";
for (int i = 0; i < files.Length; i++)
{
if (Path.GetExtension(files[i]).ToUpper().Contains(".PDF"))
{
imageName = Path.GetFileNameWithoutExtension(files[i]);
files[i] = Common.ConvertPDF2Image(files[i], imageName, 0, 1, ImageFormat.Png);
errMsg.AppendText(DateTime.Now.ToLongTimeString().ToString() + " 已将" + imageName + ".pdf自动转换为png图片格式\r\n");
}
}
return files;
}
else
{
return null;
}
}
//格式化日期格式
public string fmartDate(string date)
{
date = date.Replace("年", "-");
date = date.Replace("月", "-");
date = date.Replace("日", "");
return date;
}
四、获取api返回的数据,输出到dataGridView中。
private void 识别发票ToolStripMenuItem_Click(object sender, EventArgs e)
{
if (this.listView1.Items.Count == 0)
{
MessageBox.Show("请先选择要识别的发票!", "消息提示", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation);
return;
}
Common.ShowProcessing("", this, (obj) =>
{
//这里采用委托的方式解决线程卡死问题
this.Invoke(new Action(delegate
{
foreach (ListViewItem item in this.listView1.Items)
{
try
{
var invoiceInfo = JsonConvert.DeserializeObject<dynamic>(vatInvoice(item.SubItems[0].Name));
var items = invoiceInfo.words_result;
if (items != null)
{
//写入数据表格
int index = this.dataGridView1.Rows.Add();
this.dataGridView1.Rows[index].Cells[0].Value = items.InvoiceType;
this.dataGridView1.Rows[index].Cells[1].Value = items.InvoiceCode;
this.dataGridView1.Rows[index].Cells[2].Value = items.InvoiceNum;
this.dataGridView1.Rows[index].Cells[3].Value = fmartDate((string)items.InvoiceDate);
this.dataGridView1.Rows[index].Cells[4].Value = items.SellerName;
this.dataGridView1.Rows[index].Cells[5].Value = items.SellerRegisterNum;
this.dataGridView1.Rows[index].Cells[6].Value = items.SellerAddress;
this.dataGridView1.Rows[index].Cells[7].Value = items.SellerBank;
this.dataGridView1.Rows[index].Cells[8].Value = Common.NumberToZero((string)items.TotalAmount);
this.dataGridView1.Rows[index].Cells[9].Value = "0";
if (Common.IsPropertyExist(items, "CommodityTaxRate"))
{
if (!Common.IsNullOrEmpty(items.CommodityTaxRate[0].word))
{
this.dataGridView1.Rows[index].Cells[9].Value = Common.NumberToZero((string)items.CommodityTaxRate[0].word.ToString().Replace("%", ""));
}
}
this.dataGridView1.Rows[index].Cells[10].Value = Common.NumberToZero((string)items.TotalTax);
this.dataGridView1.Rows[index].Cells[11].Value = items.AmountInFiguers;
this.dataGridView1.Rows[index].Cells[12].Value = items.InvoiceType.ToString().Contains("电子") ? "是" : "否";
this.dataGridView1.Rows[index].Cells[13].Value = items.PurchaserName;
this.dataGridView1.Rows[index].Cells[14].Value = "一般计税";
Application.DoEvents();
addMessage(item.SubItems[0].Text + " 识别完成!");
}
else
{
if (invoiceInfo.error_code != null)
{
addMessage(item.SubItems[0].Text + " -->" + apiErrorMessage((string)invoiceInfo.error_code));
}
}
}
catch (Exception err)
{
addMessage(item.SubItems[0].Text + err.Message + " 识别出错,已跳过!");
}
}
}));
//这里写处理耗时的代码,代码处理完成则自动关闭该窗口
}, null);
}
五、导出发票明细到EXECL表格中。
private void 导出发票信息ToolStripMenuItem_Click(object sender, EventArgs e)
{
if (this.dataGridView1.Rows.Count == 0)
{
MessageBox.Show("发票列表信息为空,不能执行导出!", "消息提示", MessageBoxButtons.OKCancel, MessageBoxIcon.Exclamation);
return;
}
string fileName = "";
SaveFileDialog sfd = new SaveFileDialog();
sfd.Filter = "导出发票Excel(*.xls)|*.xls";
sfd.FileName = "发票明细 - " + DateTime.Now.ToString("yyyyMMddHHmmss");
if (sfd.ShowDialog() == DialogResult.OK)
{
Common.ShowProcessing("正在导出,请稍候...", this, (obj) =>
{
fileName = sfd.FileName;
HSSFWorkbook wb = new HSSFWorkbook();
ISheet sheet = wb.CreateSheet("sheet1");
int columnCount = dataGridView1.ColumnCount; //列数
int rowCount = dataGridView1.Rows.Count; //行数
for (int i = 0; i < columnCount; i++)
{
sheet.SetColumnWidth(i, 15 * 256);
}
//报表标题
IRow row = sheet.CreateRow(0);
row.HeightInPoints = 25;
ICell cell = row.CreateCell(0);
cell.SetCellValue("发票信息台账");
ICellStyle style = wb.CreateCellStyle();
style.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Center;
style.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center;
style.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin;
style.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin;
style.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin;
style.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin;
style.FillBackgroundColor = HSSFColor.Black.Index;
style.FillForegroundColor = HSSFColor.White.Index;
IFont font = wb.CreateFont();
font.FontName = "微软雅黑";
font.FontHeightInPoints = 12;
font.Boldweight = 700;
style.SetFont(font);//将新的样式赋给单元格
cell.CellStyle = style;
sheet.AddMergedRegion(new CellRangeAddress(0, 0, 0, columnCount - 1));
//表头
IRow row1 = sheet.CreateRow(1);
row1.HeightInPoints = 20;
ICellStyle styleHead = wb.CreateCellStyle();
styleHead.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Center;
styleHead.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center;
styleHead.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin;
styleHead.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin;
styleHead.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin;
styleHead.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin;
styleHead.FillBackgroundColor = HSSFColor.Black.Index;
styleHead.FillForegroundColor = HSSFColor.White.Index;
IFont font2 = wb.CreateFont();
font2.FontName = "微软雅黑";
font2.FontHeightInPoints = 10;
font2.Boldweight = 500;
styleHead.SetFont(font2);//将新的样式赋给单元格
for (int i = 0; i < columnCount; i++)
{
ICell row1cell = row1.CreateCell(i);
row1cell.SetCellValue(dataGridView1.Columns[i].HeaderText.ToString());
row1cell.CellStyle = styleHead;
}
//明细行,从第三列开始
int rowindex = 2;
ICellStyle styleBody = wb.CreateCellStyle();
styleBody.Alignment = NPOI.SS.UserModel.HorizontalAlignment.Left;
styleBody.VerticalAlignment = NPOI.SS.UserModel.VerticalAlignment.Center;
styleBody.BorderTop = NPOI.SS.UserModel.BorderStyle.Thin;
styleBody.BorderRight = NPOI.SS.UserModel.BorderStyle.Thin;
styleBody.BorderBottom = NPOI.SS.UserModel.BorderStyle.Thin;
styleBody.BorderLeft = NPOI.SS.UserModel.BorderStyle.Thin;
styleBody.FillBackgroundColor = HSSFColor.Black.Index;
styleBody.FillForegroundColor = HSSFColor.White.Index;
IFont font3 = wb.CreateFont();
font3.FontName = "微软雅黑";
font3.FontHeightInPoints = 9;
font3.Boldweight = 500;
styleBody.SetFont(font3);//将新的样式赋给单元格
for (int i = 0; i < rowCount; i++)
{
IRow datarow = sheet.CreateRow(rowindex);
datarow.Height = 300;
for (int j = 0; j < columnCount; j++)
{
ICell datacell_0 = datarow.CreateCell(j);
datacell_0.SetCellValue(this.dataGridView1.Rows[i].Cells[j].Value.ToString());
datacell_0.CellStyle = styleBody;
}
rowindex += 1;
}
// 转为字节数组
MemoryStream stream = new MemoryStream();
wb.Write(stream);
var buf = stream.ToArray();
//保存为Excel文件
using (FileStream fs = new FileStream(fileName, FileMode.Create, FileAccess.Write))
{
fs.Write(buf, 0, buf.Length);
fs.Flush();
MessageBox.Show("导出 EXECL 成功!", "消息提示");
addMessage("导出发票信息到Execl表完成!");
}
}, null);
}
}
操作说明如下:
Copyright © 广州京杭网络科技有限公司 2005-2024 版权所有 粤ICP备16019765号
广州京杭网络科技有限公司 版权所有