Python 批量复制 Excel 多列（含公式）到多文件

2025-03-15 11:20:35

Excel 多列公式复制与跨文件插入：Python 解决方案

事情是这样的，我需要把一个 Excel 表格里的几列（有些带公式，有些不带）复制出来，然后塞到另一个文件夹下面的一堆 Excel 文件里。我试了试，不带公式的列复制粘贴挺顺利，但带公式的列就卡壳了，老是报错 An error occurred: 'Cell' object has no attribute 'formula'。琢磨了半天，发现了一些坑。

问题出在哪儿？

报错信息 "Cell' object has no attribute 'formula' 意思是，你尝试访问的单元格对象没有“formula”这个属性。这说明什么呢? 问题出在你用来复制单元格的代码上。

原代码直接用了 .formula 属性，而实际上只有包含公式的单元格才有这个属性。那些不包含公式的普通单元格（比如纯数字、文本），你访问它的 .formula，那不就出错了嘛。

另外， openpyxl复制的时候并不能完美地保留格式和公式的相对引用, 因此这里需要一个比较稳妥的方式去处理这些东西.

解决办法，安排！

下面我把我想到的几个解决办法列出来，一个比一个高级：

1.笨办法：先判断，再复制

最直接的想法，复制前先看看这个单元格有没有公式。有，就复制公式；没有，就复制值。

原理： 用 cell.data_type 判断单元格类型。data_type 是 'f' 就表示这个单元格是公式。
代码示例：

import openpyxl
import os

def copy_columns_with_formulas(source_file, source_columns, root_dir, dest_columns, source_sheet=None, dest_sheet=None):
    """
    复制源 Excel 文件中指定列（包含公式）到目标文件夹下多个 Excel 文件的指定列。
    """
    try:
        if not source_file.endswith(('.xlsx', '.xls')):
            raise ValueError("源文件必须是 Excel 文件 (.xlsx 或 .xls)。")

        source_wb = openpyxl.load_workbook(source_file)
        source_ws = source_wb[source_sheet] if source_sheet else source_wb.active

        if len(source_columns) != len(dest_columns):
            raise ValueError("源列和目标列的数量必须相同。")

        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(('.xlsx', '.xls')) and os.path.join(root, file) != os.path.abspath(source_file): # 防止修改源文件。
                    dest_file_path = os.path.join(root, file)
                    try:
                        dest_wb = openpyxl.load_workbook(dest_file_path)
                        dest_ws = dest_wb[dest_sheet] if dest_sheet else dest_wb.active

                        for source_col, dest_col in zip(source_columns, dest_columns):
                            if isinstance(source_col, str):
                                source_col_index = openpyxl.utils.column_index_from_string(source_col)
                            else:
                                source_col_index = source_col
                            if isinstance(dest_col, str):
                                dest_col_index = openpyxl.utils.column_index_from_string(dest_col)
                            else:
                                dest_col_index = dest_col

                            for row in range(1, source_ws.max_row + 1):
                                source_cell = source_ws.cell(row=row, column=source_col_index)
                                dest_cell = dest_ws.cell(row=row, column=dest_col_index)

                                if source_cell.data_type == 'f':
                                    # 如果是公式，复制公式和值
                                    dest_cell.value = "="+source_cell.formula[1:]
                                else:
                                    # 如果不是公式，只复制值
                                    dest_cell.value = source_cell.value

                        dest_wb.save(dest_file_path)
                        print(f"已将包含公式的列复制到 '{dest_file_path}'，工作表 '{dest_sheet if dest_sheet else '第一个工作表'}'。")

                    except Exception as e:
                        print(f"处理 '{dest_file_path}' 时出错：{e}")
        print("复制过程完成。")

    except FileNotFoundError:
        print(f"错误：未找到源文件或根目录。")
    except Exception as e:
        print(f"发生错误：{e}")

# 示例用法
source_file = r"C:\Users\xxx\Documents\Source File.xlsx" # 你的源文件路径
source_columns = [1, 3]  # 要复制的源列（从0开始的索引或列字母）
root_directory = r"C:\Users\xxx\Documents\Test"  # 你的目标文件夹路径
dest_columns = [5, 6]  # 要粘贴到的目标列（从0开始的索引或列字母）
source_sheet = "Sheet1"  # 源工作表名称，如果 None 则使用第一个工作表
dest_sheet = "Sheet1"   # 目标工作表名称，如果 None 则使用第一个工作表

copy_columns_with_formulas(source_file, source_columns, root_directory, dest_columns, source_sheet, dest_sheet)

安全建议： 在操作前，最好把目标文件都备份一下，以防万一。

2. 进阶版：直接复制整个单元格对象

上面的方法有个小问题：它只复制了公式和值，格式、样式什么的都没管。要想复制得更彻底，可以尝试复制整个单元格对象。

原理： 用 openpyxl.cell.cell.Cell 创建一个新的单元格对象，然后把源单元格的属性（值、公式、样式等）都赋给新单元格。
注意: 我们这里使用赋值而不是复制, 避免可能存在的内存问题.

代码：

import openpyxl
import os
from copy import copy

def copy_columns_with_everything(source_file, source_columns, root_dir, dest_columns, source_sheet=None, dest_sheet=None):

    """复制源文件的指定列到目标文件夹下所有excel文件的指定列, 包括样式

    """
    try:
        if not source_file.endswith(('.xlsx', '.xls')):
            raise ValueError("Source file must be an Excel file (.xlsx or .xls).")

        source_wb = openpyxl.load_workbook(source_file)
        source_ws = source_wb[source_sheet] if source_sheet else source_wb.active

        if len(source_columns) != len(dest_columns):
            raise ValueError("The number of source and destination columns should be equal")

        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(('.xlsx', '.xls')) and os.path.join(root,file) != os.path.abspath(source_file): #防止源文件被修改
                    dest_file_path = os.path.join(root, file)
                    print(f"Processing {dest_file_path}")
                    try:
                        dest_wb = openpyxl.load_workbook(dest_file_path)
                        dest_ws = dest_wb[dest_sheet] if dest_sheet else dest_wb.active

                        for source_col, dest_col in zip(source_columns,dest_columns):
                             #转换为从1开始的索引.
                            if isinstance(source_col,str):
                                source_col_index = openpyxl.utils.column_index_from_string(source_col)
                            else:
                                source_col_index = source_col +1

                            if isinstance(dest_col,str):
                                dest_col_index = openpyxl.utils.column_index_from_string(dest_col)
                            else:
                                dest_col_index = dest_col+1

                            for row in range(1, source_ws.max_row + 1):
                                source_cell = source_ws.cell(row = row, column=source_col_index)
                                dest_cell = dest_ws.cell(row = row, column = dest_col_index)

                               # 复制单元格内容及样式
                                dest_cell.value = source_cell.value
                                if source_cell.has_style:
                                    dest_cell._style = copy(source_cell._style)
                                if source_cell.font:
                                    dest_cell.font = copy(source_cell.font)
                                if source_cell.border:
                                    dest_cell.border = copy(source_cell.border)
                                if source_cell.fill:
                                     dest_cell.fill = copy(source_cell.fill)
                                if source_cell.number_format:
                                    dest_cell.number_format = source_cell.number_format
                                if source_cell.protection:
                                    dest_cell.protection = copy(source_cell.protection)
                                if source_cell.alignment:
                                    dest_cell.alignment = copy(source_cell.alignment)

                                if source_cell.data_type == 'f':
                                      dest_cell.value = "="+source_cell.formula[1:]


                        dest_wb.save(dest_file_path)
                        print(f"Columns copied successfully to {dest_file_path}")

                    except Exception as e:
                        print(f"Error occur when processing: {dest_file_path} :  {e}")

        print("All done!")

    except FileNotFoundError:
        print(f"Error: Source file not found")
    except Exception as e:
        print(f"An error occurred:{e}")

#示例用法
source_file = r"C:\Users\xxx\Documents\Source File.xlsx" # 你的源文件路径
source_columns = [1, 3]  # 要复制的源列（从0开始的索引或列字母）
root_directory = r"C:\Users\xxx\Documents\Test"  # 你的目标文件夹路径
dest_columns = [5, 6]  # 要粘贴到的目标列（从0开始的索引或列字母）
source_sheet = "Sheet1"  # 源工作表名称，如果 None 则使用第一个工作表
dest_sheet = "Sheet1"   # 目标工作表名称，如果 None 则使用第一个工作表

copy_columns_with_everything(source_file,source_columns,root_directory,dest_columns,source_sheet, dest_sheet)

注意事项：
这个方法更强大, 但更复杂。它适用于你需要完整复制单元格所有信息的情况. 因为是复制整个object, 要小心一些内存问题(如果文件过大)。
安全建议: 这个操作更要小心了。如果目标Excel已经有些重要内容，一定一定备份。

3. 终极武器：`pandas` + 公式处理

如果前面两种方法还是不能满足你，或者文件特别多，处理速度慢, 还可以试试 pandas。pandas 在处理表格数据方面非常强大，而且速度快。

原理：
1. 用 pandas 读取源 Excel 文件和目标 Excel 文件。
2. 从源 DataFrame 中提取需要复制的列。
3. 对于每个目标文件：
  - 将提取的列插入到目标 DataFrame 的指定位置。
  - 重要: 使用apply方法处理每一列，用正则表达式替换公式中的单元格引用，以处理公式中的相对引用。
4. 将修改后的 DataFrame 写回目标 Excel 文件。
代码：

import pandas as pd
import os
import re
import openpyxl

def copy_columns_with_formulas_pandas(source_file, source_columns, root_dir, dest_columns, source_sheet=None, dest_sheet=None):
    """
    使用 pandas 复制源 Excel 文件中指定列（包含公式）到目标文件夹下多个 Excel 文件的指定列，并处理公式引用。
    """
    try:
        if not source_file.endswith(('.xlsx', '.xls')):
            raise ValueError("源文件必须是 Excel 文件 (.xlsx 或 .xls)。")

        # 使用 openpyxl 读取源文件以获取公式
        source_wb = openpyxl.load_workbook(source_file, data_only=False)
        source_ws = source_wb[source_sheet] if source_sheet else source_wb.active

        # 将 openpyxl 工作表转换为 pandas DataFrame
        source_df = pd.DataFrame(source_ws.values)

         #获取表头
        header = source_df.iloc[0]
        source_df = source_df[1:]
        source_df.columns = header

        # 提取需要复制的列. 这里需要 +1
        source_data = source_df.iloc[:, [c-1 if isinstance(c, int) else openpyxl.utils.column_index_from_string(c)-1 for c in source_columns ] ]

        if len(source_columns) != len(dest_columns):
            raise ValueError("源列和目标列的数量必须相同。")

        for root, _, files in os.walk(root_dir):
            for file in files:
                if file.endswith(('.xlsx', '.xls')) and os.path.join(root, file) != os.path.abspath(source_file):  # 防止修改源文件。
                    dest_file_path = os.path.join(root, file)
                    try:
                        # 读取目标文件
                        dest_df = pd.read_excel(dest_file_path, sheet_name=dest_sheet)

                        for i,(source_col_index,dest_col) in enumerate(zip(source_columns, dest_columns)):
                            if isinstance(dest_col,str):
                                dest_col_index = openpyxl.utils.column_index_from_string(dest_col) -1 #这里需要 -1. 因为pandas 是从0开始计数的
                            else:
                                dest_col_index = dest_col -1
          
                           # 将提取的列数据插入目标 DataFrame
                            dest_df.insert(loc=dest_col_index, column=f'temp_col_{i}', value=source_data.iloc[:, i])

                            #openpyxl 获取原始公式 (如果有)
                            def get_formula(row_index, source_col_index):
                                  try:
                                    source_col_openpyxl_index = source_col_index
                                    if isinstance(source_col_openpyxl_index,str):
                                            source_col_openpyxl_index = openpyxl.utils.column_index_from_string(source_col_openpyxl_index)

                                    cell = source_ws.cell(row=row_index + 2, column=source_col_openpyxl_index)
                                    if cell.data_type == 'f':
                                          return "="+cell.formula[1:] #返回公式
                                    else:
                                        return cell.value #返回原始值
                                  except:
                                        return None

                            dest_df[f'temp_col_{i}'] = [get_formula(i,source_col_index) for i in range(len(dest_df))]
                        
                        #替换零时列的名称为原名称
                        original_cols = dest_df.columns.tolist()
                        for i, dest_col_name in enumerate([source_ws.cell(row = 1, column=sc).value for sc in source_columns ]): #使用openpxyl 里的source_ws 拿到source的表头
                            temp_col_index = original_cols.index(f'temp_col_{i}')
                            original_cols[temp_col_index] = dest_col_name  # Use the name directly from source column name
                        dest_df.columns = original_cols
                        
                        # 写回 Excel 文件
                        dest_df.to_excel(dest_file_path, index=False, sheet_name=dest_sheet)
                        print(f"已使用 pandas 将包含公式的列复制到 '{dest_file_path}'，工作表 '{dest_sheet if dest_sheet else '第一个工作表'}'。")

                    except Exception as e:
                        print(f"处理 '{dest_file_path}' 时出错：{e}")
        print("复制过程完成。")

    except FileNotFoundError:
        print(f"错误：未找到源文件或根目录。")
    except Exception as e:
        print(f"发生错误：{e}")
#示例
source_file = r"C:\Users\xxx\Documents\Source File.xlsx" # 你的源文件路径
source_columns = [1, 3]  # 要复制的源列（从0开始的索引或列字母）
root_directory = r"C:\Users\xxx\Documents\Test"  # 你的目标文件夹路径
dest_columns = [5, 6]  # 要粘贴到的目标列（从0开始的索引或列字母）
source_sheet = "Sheet1"  # 源工作表名称，如果 None 则使用第一个工作表
dest_sheet = "Sheet1"   # 目标工作表名称，如果 None 则使用第一个工作表

copy_columns_with_formulas_pandas(source_file, source_columns, root_directory, dest_columns, source_sheet, dest_sheet)