import os import subprocess import tempfile from androguard.misc import AnalyzeAPK from transformers import BloomForCausalLM, BloomTokenizerFast, RobertaTokenizer, RobertaForCausalLM, pipeline from sentence_transformers import SentenceTransformer, util import torch import gradio as gr # Inicialização dos modelos com tratamento de erros try: # BLOOM para compreensão de linguagem natural bloom_tokenizer = BloomTokenizerFast.from_pretrained("bigscience/bloom-560m") bloom_model = BloomForCausalLM.from_pretrained("bigscience/bloom-560m") bloom_model.eval() # Modelo de indexação indexing_model = SentenceTransformer("all-MiniLM-L6-v2") # CodeBERT para análise de código tokenizer = RobertaTokenizer.from_pretrained("microsoft/codebert-base") codebert_model = RobertaForCausalLM.from_pretrained("microsoft/codebert-base") codebert_model.eval() except Exception as e: print(f"Erro ao carregar modelos: {str(e)}") raise # Contexto global apk_context = {"smali": {}, "java": {}, "info": ""} def check_java(): try: result = subprocess.run(["java", "-version"], check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) print("Java está disponível:", result.stderr.decode()) except FileNotFoundError: raise EnvironmentError("Java não está instalado") except Exception as e: raise EnvironmentError(f"Erro ao verificar Java: {str(e)}") def install_tools(): baksmali_path = "/usr/local/bin/baksmali.jar" jadx_path = "/usr/local/bin/jadx/bin/jadx" check_java() if not os.path.exists(baksmali_path): print("Instalando Baksmali...") subprocess.run([ "curl", "-L", "-o", baksmali_path, "https://bitbucket.org/JesusFreke/smali/downloads/baksmali-2.5.2.jar" ], check=True) jadx_zip_path = "/usr/local/bin/jadx.zip" if not os.path.exists(jadx_path): print("Instalando JADX...") subprocess.run([ "curl", "-L", "-o", jadx_zip_path, "https://github.com/skylot/jadx/releases/download/v1.4.7/jadx-1.4.7.zip" ], check=True) subprocess.run(["unzip", "-o", jadx_zip_path, "-d", "/usr/local/bin/jadx"], check=True) if os.path.exists(jadx_path): subprocess.run(["chmod", "+x", jadx_path], check=True) def decompile_apk(apk_file): if not apk_file: return "Nenhum arquivo enviado" temp_apk_path = apk_file.name output_dir = tempfile.mkdtemp() try: # Smali smali_output = os.path.join(output_dir, "smali") subprocess.run([ "java", "-jar", "/usr/local/bin/baksmali.jar", "d", temp_apk_path, "-o", smali_output ], check=True) # JADX java_output = os.path.join(output_dir, "java") subprocess.run([ "/usr/local/bin/jadx/bin/jadx", "-d", java_output, temp_apk_path ], check=True) # Coletar arquivos smali_files = {} java_files = {} for root, _, files in os.walk(smali_output): for file in files: if file.endswith(".smali"): with open(os.path.join(root, file), "r") as f: smali_files[file] = f.read() for root, _, files in os.walk(java_output): for file in files: if file.endswith(".java"): with open(os.path.join(root, file), "r") as f: java_files[file] = f.read() apk_context["smali"] = smali_files apk_context["java"] = java_files return f"Decompilação concluída: {len(smali_files)} arquivos Smali, {len(java_files)} arquivos Java" except Exception as e: return f"Erro na decompilação: {str(e)}" def process_with_bloom(text): try: # Preparar input inputs = bloom_tokenizer( text, return_tensors="pt", max_length=512, truncation=True, padding=True ) # Gerar resposta with torch.no_grad(): outputs = bloom_model.generate( inputs["input_ids"], max_length=200, num_return_sequences=1, temperature=0.7, pad_token_id=bloom_tokenizer.pad_token_id ) # Decodificar resposta processed = bloom_tokenizer.decode(outputs[0], skip_special_tokens=True) return processed except Exception as e: print(f"Erro no processamento BLOOM: {str(e)}") return text def analyze_with_codebert(code_text, query): try: # Preparar prompt prompt = f"Query: {query}\nCódigo: {code_text[:500]}" # Tokenizar com padding adequado inputs = tokenizer( prompt, return_tensors="pt", max_length=512, truncation=True, padding="max_length" ) # Gerar análise with torch.no_grad(): outputs = codebert_model.generate( inputs["input_ids"], max_length=200, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id ) # Decodificar resposta analysis = tokenizer.decode(outputs[0], skip_special_tokens=True) return analysis except Exception as e: print(f"Erro na análise CodeBERT: {str(e)}") return "Não foi possível analisar o código" def query_apk_chat(user_message): if not apk_context["smali"] and not apk_context["java"]: return "Nenhum APK decompilado disponível" try: # Processar query com BLOOM processed_query = process_with_bloom(user_message) # Preparar todos os códigos all_codes = [] all_files = [] for file, code in apk_context["java"].items(): all_codes.append(code) all_files.append(("java", file)) for file, code in apk_context["smali"].items(): all_codes.append(code) all_files.append(("smali", file)) # Calcular embeddings query_embedding = indexing_model.encode(processed_query, convert_to_tensor=True) code_embeddings = indexing_model.encode(all_codes, convert_to_tensor=True) # Encontrar matches similarities = util.pytorch_cos_sim(query_embedding, code_embeddings)[0] top_k = min(3, len(all_codes)) best_matches = torch.topk(similarities, k=top_k) response = [] for score, idx in zip(best_matches.values, best_matches.indices): file_type, file_name = all_files[idx] code = all_codes[idx] # Análise do código analysis = analyze_with_codebert(code, processed_query) response.append(f"\nArquivo ({file_type}): {file_name}") response.append(f"Relevância: {score:.2f}") response.append(f"Código:\n{code[:500]}...") response.append(f"Análise:\n{analysis}\n") response.append("-" * 80) return "\n".join(response) except Exception as e: return f"Erro na análise: {str(e)}" # Configuração Gradio install_tools() upload_interface = gr.Interface( fn=decompile_apk, inputs=gr.File(label="APK File", file_types=[".apk"]), outputs="text", title="APK Analyzer", description="Upload an APK file for analysis" ) chat_interface = gr.Interface( fn=query_apk_chat, inputs=gr.Textbox(lines=3, placeholder="Ask about the APK code..."), outputs=gr.Textbox(lines=20), title="Code Analysis Chat", description="AI-powered code analysis" ) # Interface combinada iface = gr.TabbedInterface( [upload_interface, chat_interface], ["Upload APK", "Analyze Code"] ) if __name__ == "__main__": iface.launch()