Files
DebugTool/source/data_analysis.py
2024-11-23 17:39:54 +08:00

73 lines
3.0 KiB
Python

import requests
import pandas as pd
from pathlib import Path
from bs4 import BeautifulSoup
API_URL = "https://energy-iot.chinatowercom.cn/api/device/device/historyPerformance"
headers = {
"Accept-Encoding": "gzip, deflate, br, zstd",
"Connection": "keep-alive",
"Content-Length": "211",
"Cookie": "HWWAFSESTIME=1732173820506; HWWAFSESID=1739685743c73769ff; dc04ed2361044be8a9355f6efb378cf2=WyIzNTI0NjE3OTgzIl0",
"Host": "energy-iot.chinatowercom.cn",
"Origin": "https://energy-iot.chinatowercom.cn",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0",
"accept": "application/json, text/plain, */*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
"authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiIl0sInVzZXJfbmFtZSI6IndlYl9tYW5hZ2V8d2FuZ2xlaTQiLCJzY29wZSI6WyJhbGwiXSwiZXhwIjoxNzMyMjc4NDA5LCJ1c2VySWQiOjI0Mjg1LCJqdGkiOiJkODE0YTZhYy05YmJmLTQ0ZjQtYWRhYi0wMzAzNjUzNmNhNWIiLCJjbGllbnRfaWQiOiJ3ZWJfbWFuYWdlIn0.VhJaDKwzjekwOCsw_jOF_jvg7sX45okFcxkLyWtbfFVGWVWANhKNhVqj5Dn0Qb3wUXH3e-w74sDN1RI9QADngMOGP_H7aTwI_nukj6VmjpFA7kEtOBwa6ouvPZQMa1qa3UWl21Ac6GoLu14T4TIf4kQAMTdaYAMFrwDAXAkqvIDmKKjZbnDFUjUIcj-J_Y-LfHCEBjtcz7Rp_wMO-PMA5wII6kbcNoSFiYb0djcFQyeBcIUSUTRPixPcTYBkS-IhNrsOePIWlpNYMHbPxZdrZkV4M65BmBn4A9MUjWYHm7iIut8WVMdCXR4Sxp9m0mJHXR_IPWES4O7aBcuMkOmjyw",
"content-type": "application/json;charset=UTF-8",
"sec-ch-ua": "\"Microsoft Edge\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "Windows",
}
body = {
"startTimestamp": 1732032000000,
"endTimestamp": 1732291199000,
"deviceCode": "TTE0102DX2406240497",
"mid": "0305120001",
"businessType": "7",
"pageNum": 2,
"pageSize": 5,
"total": 0
}
# 1. 读取本地HTML文件
file_path = Path(r'D:\WorkingProject\LightStackAdapter\Log\设备测试数据记录-铁塔主站\南和县牧村\Untitled-1.html')
html_content = file_path.read_text()
# 2. 解析HTML文件
soup = BeautifulSoup(html_content, 'html.parser')
# 3. 找到表格元素
table = soup.find_all('table') # 假设页面中只有一个表格,如果有多个表格,可能需要进一步筛选
# 4. 提取表格数据
data = []
headers = []
# 提取表头
header_row = table.find('thead').find('tr')
for header in header_row.find_all('th'):
headers.append(header.text.strip())
# 提取数据行
for row in table.find('tbody').find_all('tr'):
row_data = []
for cell in row.find_all(['td', 'th']):
row_data.append(cell.text.strip())
data.append(row_data)
# 5. 将数据保存为DataFrame
df = pd.DataFrame(data, columns=headers)
# 6. 将DataFrame保存为CSV文件
output_file = 'extracted_table.csv'
df.to_csv(output_file, index=False, encoding='utf-8')
print(f'表格数据已成功提取并保存到 {output_file}')