There is a python file to extract user’s data from telegram group.
Here is the codes :
from telethon.sync import TelegramClient from telethon.tl.functions.messages import GetDialogsRequest from telethon.tl.types import InputPeerEmpty, InputPeerChannel, InputPeerUser from telethon.errors.rpcerrorlist import PeerFloodError, UserPrivacyRestrictedError from telethon.tl.functions.channels import InviteToChannelRequest import sys import csv import traceback import time import random import re api_id = 000000 # YOUR API_ID api_hash = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # YOUR API_HASH phone = '+34000000000' # YOUR PHONE NUMBER, INCLUDING COUNTRY CODE client = TelegramClient(phone, api_id, api_hash) client.connect() if not client.is_user_authorized(): client.send_code_request(phone) client.sign_in(phone, input('Enter the code: ')) def add_users_to_group(): input_file = sys.argv[1] users = [] with open(input_file, encoding='UTF-8') as f: rows = csv.reader(f,delimiter=",",lineterminator="n") next(rows, None) for row in rows: user = {} user['username'] = row[0] try: user['id'] = int(row[1]) user['access_hash'] = int(row[2]) except IndexError: print ('users without id or access_hash') users.append(user) #random.shuffle(users) chats = [] last_date = None chunk_size = 10 groups=[] result = client(GetDialogsRequest( offset_date=last_date, offset_id=0, offset_peer=InputPeerEmpty(), limit=chunk_size, hash = 0 )) chats.extend(result.chats) for chat in chats: try: if chat.megagroup== True: # CONDITION TO ONLY LIST MEGA GROUPS. groups.append(chat) except: continue print('Choose a group to add members:') i=0 for group in groups: print(str(i) + '- ' + group.title) i+=1 g_index = input("Enter a Number: ") target_group=groups[int(g_index)] print('nnGrupo elegido:t' + groups[int(g_index)].title) target_group_entity = InputPeerChannel(target_group.id,target_group.access_hash) mode = int(input("Enter 1 to add by username or 2 to add by ID: ")) error_count = 0 for user in users: try: print ("Adding {}".format(user['username'])) if mode == 1: if user['username'] == "": continue user_to_add = client.get_input_entity(user['username']) elif mode == 2: user_to_add = InputPeerUser(user['id'], user['access_hash']) else: sys.exit("Invalid Mode Selected. Please Try Again.") client(InviteToChannelRequest(target_group_entity,[user_to_add])) print("Waiting 60 Seconds...") time.sleep(60) except PeerFloodError: print("Getting Flood Error from telegram. Script is stopping now. Please try again after some time.") except UserPrivacyRestrictedError: print("The user's privacy settings do not allow you to do this. Skipping.") except: traceback.print_exc() print("Unexpected Error") error_count += 1 if error_count > 10: sys.exit('too many errors') continue def list_users_in_group(): chats = [] last_date = None chunk_size = 200 groups=[] result = client(GetDialogsRequest( offset_date=last_date, offset_id=0, offset_peer=InputPeerEmpty(), limit=chunk_size, hash = 0 )) chats.extend(result.chats) for chat in chats: try: print(chat) groups.append(chat) # if chat.megagroup== True: except: continue print('Choose a group to scrape members from:') i=0 for g in groups: print(str(i) + '- ' + g.title) i+=1 g_index = input("Enter a Number: ") target_group=groups[int(g_index)] print('nnGrupo elegido:t' + groups[int(g_index)].title) print('Fetching Members...') all_participants = [] all_participants = client.get_participants(target_group, aggressive=True) print('Saving In file...') with open("members-" + re.sub("-+","-",re.sub("[^a-zA-Z]","-",str.lower(target_group.title))) + ".csv","w",encoding='UTF-8') as f: writer = csv.writer(f,delimiter=",",lineterminator="n") writer.writerow(['username','user id', 'access hash','name','group', 'group id']) for user in all_participants: if user.username: username= user.username else: username= "" if user.first_name: first_name= user.first_name else: first_name= "" if user.last_name: last_name= user.last_name else: last_name= "" name= (first_name + ' ' + last_name).strip() writer.writerow([username,user.id,user.access_hash,name,target_group.title, target_group.id]) print('Members scraped successfully.') def printCSV(): input_file = sys.argv[1] users = [] with open(input_file, encoding='UTF-8') as f: rows = csv.reader(f,delimiter=",",lineterminator="n") next(rows, None) for row in rows: user = {} user['username'] = row[0] user['id'] = int(row[1]) user['access_hash'] = int(row[2]) users.append(user) print(row) print(user) sys.exit('FINITO') # print('Fetching Members...') # all_participants = [] # all_participants = client.get_participants(target_group, aggressive=True) print('What do you want to do:') mode = int(input("Enter n1-List users in a groupn2-Add users from CSV to Group (CSV must be passed as a parameter to the scriptn3-Show CSVnnYour option: ")) if mode == 1: list_users_in_group() elif mode == 2: add_users_to_group() elif mode == 3: printCSV()
After extracting members when i open members--.csv
file i see problems on UniCode characters.
How can i fix this issue?
I am using excel 2016
Advertisement
Answer
The problem is not your code, it’s Excel. When Excel opens a file it uses the encoding that is default for your version of Windows, and that encoding is never UTF-8 – it’s one of the many code pages that they invented before Unicode came about.
If you use the text import wizard, there’s an option to select the text encoding, and you can choose UTF-8 there if you want. But that’s a pain to do every time you need to open a CSV.
There’s a way to make Excel recognize that the file is UTF-8 encoded and use it automatically, many Microsoft products use the same trick. If the file starts with a Unicode Byte Order Mark (BOM) U+FEFF encoded in UTF-8 (the 3 byte sequence 0xEF,0xBB,0xBF), Excel will recognize that the file is UTF-8 encoded and override its default. Python will automatically start your file with this BOM sequence if you use the special encoding 'utf_8_sig'
.
with open("members-" + re.sub("-+","-",re.sub("[^a-zA-Z]","-",str.lower(target_group.title))) + ".csv","w",encoding='utf_8_sig') as f:
It’s not recommended that you put this special signature at the beginning of every file, only when you know it will be consumed by an application that requires it.