import requests import pandas as pd from pathlib import Path from bs4 import BeautifulSoup API_URL = "https://energy-iot.chinatowercom.cn/api/device/device/historyPerformance" headers = { "Accept-Encoding": "gzip, deflate, br, zstd", "Connection": "keep-alive", "Content-Length": "211", "Cookie": "HWWAFSESTIME=1732173820506; HWWAFSESID=1739685743c73769ff; dc04ed2361044be8a9355f6efb378cf2=WyIzNTI0NjE3OTgzIl0", "Host": "energy-iot.chinatowercom.cn", "Origin": "https://energy-iot.chinatowercom.cn", "Sec-Fetch-Dest": "empty", "Sec-Fetch-Mode": "cors", "Sec-Fetch-Site": "same-origin", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", "accept": "application/json, text/plain, */*", "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", "authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhdWQiOlsiIl0sInVzZXJfbmFtZSI6IndlYl9tYW5hZ2V8d2FuZ2xlaTQiLCJzY29wZSI6WyJhbGwiXSwiZXhwIjoxNzMyMjc4NDA5LCJ1c2VySWQiOjI0Mjg1LCJqdGkiOiJkODE0YTZhYy05YmJmLTQ0ZjQtYWRhYi0wMzAzNjUzNmNhNWIiLCJjbGllbnRfaWQiOiJ3ZWJfbWFuYWdlIn0.VhJaDKwzjekwOCsw_jOF_jvg7sX45okFcxkLyWtbfFVGWVWANhKNhVqj5Dn0Qb3wUXH3e-w74sDN1RI9QADngMOGP_H7aTwI_nukj6VmjpFA7kEtOBwa6ouvPZQMa1qa3UWl21Ac6GoLu14T4TIf4kQAMTdaYAMFrwDAXAkqvIDmKKjZbnDFUjUIcj-J_Y-LfHCEBjtcz7Rp_wMO-PMA5wII6kbcNoSFiYb0djcFQyeBcIUSUTRPixPcTYBkS-IhNrsOePIWlpNYMHbPxZdrZkV4M65BmBn4A9MUjWYHm7iIut8WVMdCXR4Sxp9m0mJHXR_IPWES4O7aBcuMkOmjyw", "content-type": "application/json;charset=UTF-8", "sec-ch-ua": "\"Microsoft Edge\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"", "sec-ch-ua-mobile": "?0", "sec-ch-ua-platform": "Windows", } body = { "startTimestamp": 1732032000000, "endTimestamp": 1732291199000, "deviceCode": "TTE0102DX2406240497", "mid": "0305120001", "businessType": "7", "pageNum": 2, "pageSize": 5, "total": 0 } # 1. 读取本地HTML文件 file_path = Path(r'D:\WorkingProject\LightStackAdapter\Log\设备测试数据记录-铁塔主站\南和县牧村\Untitled-1.html') html_content = file_path.read_text() # 2. 解析HTML文件 soup = BeautifulSoup(html_content, 'html.parser') # 3. 找到表格元素 table = soup.find_all('table') # 假设页面中只有一个表格,如果有多个表格,可能需要进一步筛选 # 4. 提取表格数据 data = [] headers = [] # 提取表头 header_row = table.find('thead').find('tr') for header in header_row.find_all('th'): headers.append(header.text.strip()) # 提取数据行 for row in table.find('tbody').find_all('tr'): row_data = [] for cell in row.find_all(['td', 'th']): row_data.append(cell.text.strip()) data.append(row_data) # 5. 将数据保存为DataFrame df = pd.DataFrame(data, columns=headers) # 6. 将DataFrame保存为CSV文件 output_file = 'extracted_table.csv' df.to_csv(output_file, index=False, encoding='utf-8') print(f'表格数据已成功提取并保存到 {output_file}')