添加SLCP102打包逻辑;
This commit is contained in:
39
source/data_analy.py
Normal file
39
source/data_analy.py
Normal file
@@ -0,0 +1,39 @@
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
# 1. 读取本地HTML文件
|
||||
file_path = Path(r'D:\WorkingProject\LightStackAdapter\Log\设备测试数据记录-铁塔主站\南和县牧村\Untitled-1.html')
|
||||
html_content = file_path.read_text()
|
||||
|
||||
|
||||
# 2. 解析HTML文件
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
|
||||
# 3. 找到表格元素
|
||||
table = soup.find_all('table') # 假设页面中只有一个表格,如果有多个表格,可能需要进一步筛选
|
||||
|
||||
# 4. 提取表格数据
|
||||
data = []
|
||||
headers = []
|
||||
|
||||
# 提取表头
|
||||
header_row = table.find('thead').find('tr')
|
||||
for header in header_row.find_all('th'):
|
||||
headers.append(header.text.strip())
|
||||
|
||||
# 提取数据行
|
||||
for row in table.find('tbody').find_all('tr'):
|
||||
row_data = []
|
||||
for cell in row.find_all(['td', 'th']):
|
||||
row_data.append(cell.text.strip())
|
||||
data.append(row_data)
|
||||
|
||||
# 5. 将数据保存为DataFrame
|
||||
df = pd.DataFrame(data, columns=headers)
|
||||
|
||||
# 6. 将DataFrame保存为CSV文件
|
||||
output_file = 'extracted_table.csv'
|
||||
df.to_csv(output_file, index=False, encoding='utf-8')
|
||||
|
||||
print(f'表格数据已成功提取并保存到 {output_file}')
|
||||
Reference in New Issue
Block a user